xref: /petsc/src/mat/impls/baij/seq/baijfact3.c (revision 048b5e81bd0d19d36c4c9e5c927f0f784ef4a6b1)
1be1d678aSKris Buschelman #define PETSCMAT_DLL
2be1d678aSKris Buschelman 
383287d42SBarry Smith /*
483287d42SBarry Smith     Factorization code for BAIJ format.
583287d42SBarry Smith */
67c4f633dSBarry Smith #include "../src/mat/impls/baij/seq/baij.h"
7c60f0209SBarry Smith #include "../src/mat/blockinvert.h"
883287d42SBarry Smith 
9db4efbfdSBarry Smith #undef __FUNCT__
104dd39f65SShri Abhyankar #define __FUNCT__ "MatSeqBAIJSetNumericFactorization"
11db4efbfdSBarry Smith /*
12db4efbfdSBarry Smith    This is used to set the numeric factorization for both LU and ILU symbolic factorization
13db4efbfdSBarry Smith */
144dd39f65SShri Abhyankar PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat fact,PetscTruth natural)
15ae3d28f0SHong Zhang {
16ae3d28f0SHong Zhang   PetscFunctionBegin;
176929473cSShri Abhyankar   if(natural){
186929473cSShri Abhyankar     switch (fact->rmap->bs){
19*048b5e81SShri Abhyankar     case 1:
20*048b5e81SShri Abhyankar       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
21*048b5e81SShri Abhyankar       break;
226929473cSShri Abhyankar     case 2:
234dd39f65SShri Abhyankar       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering;
246929473cSShri Abhyankar       break;
256929473cSShri Abhyankar     case 3:
264dd39f65SShri Abhyankar       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering;
276929473cSShri Abhyankar       break;
286929473cSShri Abhyankar     case 4:
294dd39f65SShri Abhyankar       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
306929473cSShri Abhyankar       break;
316929473cSShri Abhyankar     case 5:
324dd39f65SShri Abhyankar        fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
336929473cSShri Abhyankar        break;
346929473cSShri Abhyankar     case 6:
354dd39f65SShri Abhyankar       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering;
366929473cSShri Abhyankar       break;
376929473cSShri Abhyankar     case 7:
384dd39f65SShri Abhyankar       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering;
396929473cSShri Abhyankar       break;
4029a97285SShri Abhyankar     case 15:
4129a97285SShri Abhyankar       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_15_NaturalOrdering;
4229a97285SShri Abhyankar       break;
436929473cSShri Abhyankar     default:
444dd39f65SShri Abhyankar       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
456929473cSShri Abhyankar       break;
466929473cSShri Abhyankar     }
476929473cSShri Abhyankar   }
486929473cSShri Abhyankar   else{
49ae3d28f0SHong Zhang     switch (fact->rmap->bs){
50*048b5e81SShri Abhyankar     case 1:
51*048b5e81SShri Abhyankar       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
52*048b5e81SShri Abhyankar       break;
53ae3d28f0SHong Zhang     case 2:
544dd39f65SShri Abhyankar       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
55ae3d28f0SHong Zhang       break;
56ae3d28f0SHong Zhang     case 3:
574dd39f65SShri Abhyankar       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
58ae3d28f0SHong Zhang       break;
59ae3d28f0SHong Zhang     case 4:
604dd39f65SShri Abhyankar       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
61ae3d28f0SHong Zhang       break;
62ae3d28f0SHong Zhang     case 5:
634dd39f65SShri Abhyankar       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
64ae3d28f0SHong Zhang       break;
65ae3d28f0SHong Zhang     case 6:
664dd39f65SShri Abhyankar       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
67ae3d28f0SHong Zhang       break;
68ae3d28f0SHong Zhang     case 7:
694dd39f65SShri Abhyankar       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
70ae3d28f0SHong Zhang       break;
71ae3d28f0SHong Zhang     default:
724dd39f65SShri Abhyankar       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
73ae3d28f0SHong Zhang       break;
74ae3d28f0SHong Zhang     }
756929473cSShri Abhyankar   }
76ae3d28f0SHong Zhang   PetscFunctionReturn(0);
77ae3d28f0SHong Zhang }
78ae3d28f0SHong Zhang 
798b1456e3SHong Zhang PetscErrorCode MatSeqBAIJSetNumericFactorization_inplace(Mat inA,PetscTruth natural)
80db4efbfdSBarry Smith {
81db4efbfdSBarry Smith   PetscFunctionBegin;
82db4efbfdSBarry Smith   if (natural) {
83db4efbfdSBarry Smith     switch (inA->rmap->bs) {
84db4efbfdSBarry Smith     case 1:
8506e38f1dSHong Zhang       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace;
86db4efbfdSBarry Smith       break;
87db4efbfdSBarry Smith     case 2:
8806e38f1dSHong Zhang       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_inplace;
89db4efbfdSBarry Smith       break;
90db4efbfdSBarry Smith     case 3:
9106e38f1dSHong Zhang       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering_inplace;
92db4efbfdSBarry Smith       break;
93db4efbfdSBarry Smith     case 4:
9465460251SBarry Smith #if defined(PETSC_USE_SCALAR_MAT_SINGLE)
95db4efbfdSBarry Smith       {
96db4efbfdSBarry Smith         PetscTruth  sse_enabled_local;
97db4efbfdSBarry Smith         PetscErrorCode ierr;
98db4efbfdSBarry Smith         ierr = PetscSSEIsEnabled(inA->comm,&sse_enabled_local,PETSC_NULL);CHKERRQ(ierr);
99db4efbfdSBarry Smith         if (sse_enabled_local) {
100db4efbfdSBarry Smith #  if defined(PETSC_HAVE_SSE)
101db4efbfdSBarry Smith           int i,*AJ=a->j,nz=a->nz,n=a->mbs;
102db4efbfdSBarry Smith           if (n==(unsigned short)n) {
103db4efbfdSBarry Smith             unsigned short *aj=(unsigned short *)AJ;
104db4efbfdSBarry Smith             for (i=0;i<nz;i++) {
105db4efbfdSBarry Smith               aj[i] = (unsigned short)AJ[i];
106db4efbfdSBarry Smith             }
107db4efbfdSBarry Smith             inA->ops->setunfactored   = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE_usj;
108db4efbfdSBarry Smith             inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE_usj;
109db4efbfdSBarry Smith             ierr = PetscInfo(inA,"Using special SSE, in-place natural ordering, ushort j index factor BS=4\n");CHKERRQ(ierr);
110db4efbfdSBarry Smith           } else {
111db4efbfdSBarry Smith         /* Scale the column indices for easier indexing in MatSolve. */
112db4efbfdSBarry Smith /*            for (i=0;i<nz;i++) { */
113db4efbfdSBarry Smith /*              AJ[i] = AJ[i]*4; */
114db4efbfdSBarry Smith /*            } */
115db4efbfdSBarry Smith             inA->ops->setunfactored   = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE;
116db4efbfdSBarry Smith             inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE;
117db4efbfdSBarry Smith             ierr = PetscInfo(inA,"Using special SSE, in-place natural ordering, int j index factor BS=4\n");CHKERRQ(ierr);
118db4efbfdSBarry Smith           }
119db4efbfdSBarry Smith #  else
120db4efbfdSBarry Smith         /* This should never be reached.  If so, problem in PetscSSEIsEnabled. */
121e32f2f54SBarry Smith           SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSE Hardware unavailable");
122db4efbfdSBarry Smith #  endif
123db4efbfdSBarry Smith         } else {
12406e38f1dSHong Zhang           inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace;
125db4efbfdSBarry Smith         }
126db4efbfdSBarry Smith       }
127db4efbfdSBarry Smith #else
12806e38f1dSHong Zhang       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace;
129db4efbfdSBarry Smith #endif
130db4efbfdSBarry Smith       break;
131db4efbfdSBarry Smith     case 5:
13206e38f1dSHong Zhang       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering_inplace;
133db4efbfdSBarry Smith       break;
134db4efbfdSBarry Smith     case 6:
13506e38f1dSHong Zhang       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering_inplace;
136db4efbfdSBarry Smith       break;
137db4efbfdSBarry Smith     case 7:
13806e38f1dSHong Zhang       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering_inplace;
139db4efbfdSBarry Smith       break;
140db4efbfdSBarry Smith     default:
14106e38f1dSHong Zhang       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace;
142db4efbfdSBarry Smith       break;
143db4efbfdSBarry Smith     }
144db4efbfdSBarry Smith   } else {
145db4efbfdSBarry Smith     switch (inA->rmap->bs) {
146db4efbfdSBarry Smith     case 1:
14706e38f1dSHong Zhang       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace;
148db4efbfdSBarry Smith       break;
149db4efbfdSBarry Smith     case 2:
15006e38f1dSHong Zhang       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_inplace;
151db4efbfdSBarry Smith       break;
152db4efbfdSBarry Smith     case 3:
15306e38f1dSHong Zhang       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_inplace;
154db4efbfdSBarry Smith       break;
155db4efbfdSBarry Smith     case 4:
15606e38f1dSHong Zhang       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_inplace;
157db4efbfdSBarry Smith       break;
158db4efbfdSBarry Smith     case 5:
15906e38f1dSHong Zhang       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_inplace;
160db4efbfdSBarry Smith       break;
161db4efbfdSBarry Smith     case 6:
16206e38f1dSHong Zhang       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_inplace;
163db4efbfdSBarry Smith       break;
164db4efbfdSBarry Smith     case 7:
16506e38f1dSHong Zhang       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_inplace;
166db4efbfdSBarry Smith       break;
167db4efbfdSBarry Smith     default:
16806e38f1dSHong Zhang       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace;
169db4efbfdSBarry Smith       break;
170db4efbfdSBarry Smith     }
171db4efbfdSBarry Smith   }
172db4efbfdSBarry Smith   PetscFunctionReturn(0);
173db4efbfdSBarry Smith }
174db4efbfdSBarry Smith 
17583287d42SBarry Smith /*
17683287d42SBarry Smith     The symbolic factorization code is identical to that for AIJ format,
17783287d42SBarry Smith   except for very small changes since this is now a SeqBAIJ datastructure.
17883287d42SBarry Smith   NOT good code reuse.
17983287d42SBarry Smith */
180c7272abeSHong Zhang #include "petscbt.h"
181c7272abeSHong Zhang #include "../src/mat/utils/freespace.h"
182c7272abeSHong Zhang 
1834a2ae208SSatish Balay #undef __FUNCT__
1844dd39f65SShri Abhyankar #define __FUNCT__ "MatLUFactorSymbolic_SeqBAIJ"
1854dd39f65SShri Abhyankar PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
18683287d42SBarry Smith {
18783287d42SBarry Smith   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data,*b;
188c7272abeSHong Zhang   PetscInt           n=a->mbs,bs = A->rmap->bs,bs2=a->bs2;
189c7272abeSHong Zhang   PetscTruth         row_identity,col_identity,both_identity;
19083287d42SBarry Smith   IS                 isicol;
1916849ba73SBarry Smith   PetscErrorCode     ierr;
192c7272abeSHong Zhang   const PetscInt     *r,*ic;
193c7272abeSHong Zhang   PetscInt           i,*ai=a->i,*aj=a->j;
194c7272abeSHong Zhang   PetscInt           *bi,*bj,*ajtmp;
195c7272abeSHong Zhang   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
196c7272abeSHong Zhang   PetscReal          f;
197c7272abeSHong Zhang   PetscInt           nlnk,*lnk,k,**bi_ptr;
198c7272abeSHong Zhang   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
199c7272abeSHong Zhang   PetscBT            lnkbt;
20083287d42SBarry Smith 
20183287d42SBarry Smith   PetscFunctionBegin;
202e32f2f54SBarry Smith   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
20383287d42SBarry Smith   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
20483287d42SBarry Smith   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
20583287d42SBarry Smith   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
20683287d42SBarry Smith 
207bc74c81fSJed Brown   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
208bc74c81fSJed Brown   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
209bc74c81fSJed Brown   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
210a6df321fSShri Abhyankar   bi[0] = bdiag[0] = 0;
211b2b2dd24SShri Abhyankar 
212b2b2dd24SShri Abhyankar   /* linked list for storing column indices of the active row */
213b2b2dd24SShri Abhyankar   nlnk = n + 1;
214b2b2dd24SShri Abhyankar   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
215b2b2dd24SShri Abhyankar 
216b2b2dd24SShri Abhyankar   ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr);
217b2b2dd24SShri Abhyankar 
218b2b2dd24SShri Abhyankar   /* initial FreeSpace size is f*(ai[n]+1) */
219b2b2dd24SShri Abhyankar   f = info->fill;
220b2b2dd24SShri Abhyankar   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
221b2b2dd24SShri Abhyankar   current_space = free_space;
222b2b2dd24SShri Abhyankar 
223b2b2dd24SShri Abhyankar   for (i=0; i<n; i++) {
224b2b2dd24SShri Abhyankar     /* copy previous fill into linked list */
225b2b2dd24SShri Abhyankar     nzi = 0;
226b2b2dd24SShri Abhyankar     nnz = ai[r[i]+1] - ai[r[i]];
227e32f2f54SBarry Smith     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
228b2b2dd24SShri Abhyankar     ajtmp = aj + ai[r[i]];
229b2b2dd24SShri Abhyankar     ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
230b2b2dd24SShri Abhyankar     nzi += nlnk;
231b2b2dd24SShri Abhyankar 
232b2b2dd24SShri Abhyankar     /* add pivot rows into linked list */
233b2b2dd24SShri Abhyankar     row = lnk[n];
234b2b2dd24SShri Abhyankar     while (row < i) {
235b2b2dd24SShri Abhyankar       nzbd    = bdiag[row] + 1; /* num of entries in the row with column index <= row */
236b2b2dd24SShri Abhyankar       ajtmp   = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
237b2b2dd24SShri Abhyankar       ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
238b2b2dd24SShri Abhyankar       nzi += nlnk;
239b2b2dd24SShri Abhyankar       row  = lnk[row];
240b2b2dd24SShri Abhyankar     }
241b2b2dd24SShri Abhyankar     bi[i+1] = bi[i] + nzi;
242b2b2dd24SShri Abhyankar     im[i]   = nzi;
243b2b2dd24SShri Abhyankar 
244b2b2dd24SShri Abhyankar     /* mark bdiag */
245b2b2dd24SShri Abhyankar     nzbd = 0;
246b2b2dd24SShri Abhyankar     nnz  = nzi;
247b2b2dd24SShri Abhyankar     k    = lnk[n];
248b2b2dd24SShri Abhyankar     while (nnz-- && k < i){
249b2b2dd24SShri Abhyankar       nzbd++;
250b2b2dd24SShri Abhyankar       k = lnk[k];
251b2b2dd24SShri Abhyankar     }
2522ce24eb6SHong Zhang     bdiag[i] = nzbd; /* note : bdaig[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
253b2b2dd24SShri Abhyankar 
254b2b2dd24SShri Abhyankar     /* if free space is not available, make more free space */
255b2b2dd24SShri Abhyankar     if (current_space->local_remaining<nzi) {
256b2b2dd24SShri Abhyankar       nnz = 2*(n - i)*nzi; /* estimated and max additional space needed */
257b2b2dd24SShri Abhyankar       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
258b2b2dd24SShri Abhyankar       reallocs++;
259b2b2dd24SShri Abhyankar     }
260b2b2dd24SShri Abhyankar 
261b2b2dd24SShri Abhyankar     /* copy data into free space, then initialize lnk */
262b2b2dd24SShri Abhyankar     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
263b2b2dd24SShri Abhyankar     bi_ptr[i] = current_space->array;
264b2b2dd24SShri Abhyankar     current_space->array           += nzi;
265b2b2dd24SShri Abhyankar     current_space->local_used      += nzi;
266b2b2dd24SShri Abhyankar     current_space->local_remaining -= nzi;
267b2b2dd24SShri Abhyankar   }
268b2b2dd24SShri Abhyankar #if defined(PETSC_USE_INFO)
269b2b2dd24SShri Abhyankar   if (ai[n] != 0) {
270b2b2dd24SShri Abhyankar     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
271b2b2dd24SShri Abhyankar     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
272b2b2dd24SShri Abhyankar     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
273b2b2dd24SShri Abhyankar     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
274b2b2dd24SShri Abhyankar     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
275b2b2dd24SShri Abhyankar   } else {
276b2b2dd24SShri Abhyankar     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
277b2b2dd24SShri Abhyankar   }
278b2b2dd24SShri Abhyankar #endif
279b2b2dd24SShri Abhyankar 
280b2b2dd24SShri Abhyankar   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
281b2b2dd24SShri Abhyankar   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
282b2b2dd24SShri Abhyankar 
283b2b2dd24SShri Abhyankar   /* destroy list of free space and other temporary array(s) */
284b2b2dd24SShri Abhyankar   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
2852ce24eb6SHong Zhang   ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr);
286b2b2dd24SShri Abhyankar   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
287b2b2dd24SShri Abhyankar   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
288b2b2dd24SShri Abhyankar 
289b2b2dd24SShri Abhyankar   /* put together the new matrix */
290b2b2dd24SShri Abhyankar   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
291b2b2dd24SShri Abhyankar   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
292b2b2dd24SShri Abhyankar   b    = (Mat_SeqBAIJ*)(B)->data;
293b2b2dd24SShri Abhyankar   b->free_a       = PETSC_TRUE;
294b2b2dd24SShri Abhyankar   b->free_ij      = PETSC_TRUE;
295b2b2dd24SShri Abhyankar   b->singlemalloc = PETSC_FALSE;
296b2b2dd24SShri Abhyankar   ierr          = PetscMalloc((bdiag[0]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr);
297b2b2dd24SShri Abhyankar   b->j          = bj;
298b2b2dd24SShri Abhyankar   b->i          = bi;
299b2b2dd24SShri Abhyankar   b->diag       = bdiag;
300b2b2dd24SShri Abhyankar   b->free_diag  = PETSC_TRUE;
301b2b2dd24SShri Abhyankar   b->ilen       = 0;
302b2b2dd24SShri Abhyankar   b->imax       = 0;
303b2b2dd24SShri Abhyankar   b->row        = isrow;
304b2b2dd24SShri Abhyankar   b->col        = iscol;
305b2b2dd24SShri Abhyankar   b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;
306b2b2dd24SShri Abhyankar   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
307b2b2dd24SShri Abhyankar   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
308b2b2dd24SShri Abhyankar   b->icol       = isicol;
309b2b2dd24SShri Abhyankar   ierr = PetscMalloc((bs*n+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
310b2b2dd24SShri Abhyankar   ierr = PetscLogObjectMemory(B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2));CHKERRQ(ierr);
311b2b2dd24SShri Abhyankar 
312b2b2dd24SShri Abhyankar   b->maxnz = b->nz = bdiag[0]+1;
313d5f3da31SBarry Smith   B->factortype            =  MAT_FACTOR_LU;
314b2b2dd24SShri Abhyankar   B->info.factor_mallocs   = reallocs;
315b2b2dd24SShri Abhyankar   B->info.fill_ratio_given = f;
316b2b2dd24SShri Abhyankar 
317b2b2dd24SShri Abhyankar   if (ai[n] != 0) {
318b2b2dd24SShri Abhyankar     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
319b2b2dd24SShri Abhyankar   } else {
320b2b2dd24SShri Abhyankar     B->info.fill_ratio_needed = 0.0;
321b2b2dd24SShri Abhyankar   }
322b2b2dd24SShri Abhyankar 
323b2b2dd24SShri Abhyankar   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
324b2b2dd24SShri Abhyankar   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
325b2b2dd24SShri Abhyankar   both_identity = (PetscTruth) (row_identity && col_identity);
3264dd39f65SShri Abhyankar   ierr = MatSeqBAIJSetNumericFactorization(B,both_identity);CHKERRQ(ierr);
327b2b2dd24SShri Abhyankar   PetscFunctionReturn(0);
328b2b2dd24SShri Abhyankar  }
329b2b2dd24SShri Abhyankar 
330b2b2dd24SShri Abhyankar #undef __FUNCT__
33106e38f1dSHong Zhang #define __FUNCT__ "MatLUFactorSymbolic_SeqBAIJ_inplace"
33206e38f1dSHong Zhang PetscErrorCode MatLUFactorSymbolic_SeqBAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
333faca2338SShri Abhyankar {
334faca2338SShri Abhyankar   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data,*b;
335faca2338SShri Abhyankar   PetscInt           n=a->mbs,bs = A->rmap->bs,bs2=a->bs2;
336faca2338SShri Abhyankar   PetscTruth         row_identity,col_identity,both_identity;
337faca2338SShri Abhyankar   IS                 isicol;
338faca2338SShri Abhyankar   PetscErrorCode     ierr;
339faca2338SShri Abhyankar   const PetscInt     *r,*ic;
340faca2338SShri Abhyankar   PetscInt           i,*ai=a->i,*aj=a->j;
341faca2338SShri Abhyankar   PetscInt           *bi,*bj,*ajtmp;
342faca2338SShri Abhyankar   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
343faca2338SShri Abhyankar   PetscReal          f;
344faca2338SShri Abhyankar   PetscInt           nlnk,*lnk,k,**bi_ptr;
345faca2338SShri Abhyankar   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
346faca2338SShri Abhyankar   PetscBT            lnkbt;
347faca2338SShri Abhyankar 
348faca2338SShri Abhyankar   PetscFunctionBegin;
349e32f2f54SBarry Smith   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
350faca2338SShri Abhyankar   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
351faca2338SShri Abhyankar   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
352faca2338SShri Abhyankar   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
353faca2338SShri Abhyankar 
354bc74c81fSJed Brown   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
355bc74c81fSJed Brown   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
356bc74c81fSJed Brown   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
357bc74c81fSJed Brown 
358a6df321fSShri Abhyankar   bi[0] = bdiag[0] = 0;
359c7272abeSHong Zhang 
360c7272abeSHong Zhang   /* linked list for storing column indices of the active row */
361c7272abeSHong Zhang   nlnk = n + 1;
362c7272abeSHong Zhang   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
363c7272abeSHong Zhang 
364c7272abeSHong Zhang   ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr);
365c7272abeSHong Zhang 
366c7272abeSHong Zhang   /* initial FreeSpace size is f*(ai[n]+1) */
367c7272abeSHong Zhang   f = info->fill;
368c7272abeSHong Zhang   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
369c7272abeSHong Zhang   current_space = free_space;
37083287d42SBarry Smith 
37183287d42SBarry Smith   for (i=0; i<n; i++) {
372c7272abeSHong Zhang     /* copy previous fill into linked list */
37383287d42SBarry Smith     nzi = 0;
374c7272abeSHong Zhang     nnz = ai[r[i]+1] - ai[r[i]];
375e32f2f54SBarry Smith     if (!nnz) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
376c7272abeSHong Zhang     ajtmp = aj + ai[r[i]];
377c7272abeSHong Zhang     ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
378c7272abeSHong Zhang     nzi += nlnk;
379c7272abeSHong Zhang 
380c7272abeSHong Zhang     /* add pivot rows into linked list */
381c7272abeSHong Zhang     row = lnk[n];
382c7272abeSHong Zhang     while (row < i) {
383c7272abeSHong Zhang       nzbd    = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
384c7272abeSHong Zhang       ajtmp   = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
385c7272abeSHong Zhang       ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
386c7272abeSHong Zhang       nzi += nlnk;
387c7272abeSHong Zhang       row  = lnk[row];
38883287d42SBarry Smith     }
389c7272abeSHong Zhang     bi[i+1] = bi[i] + nzi;
390c7272abeSHong Zhang     im[i]   = nzi;
391c7272abeSHong Zhang 
392c7272abeSHong Zhang     /* mark bdiag */
393c7272abeSHong Zhang     nzbd = 0;
394c7272abeSHong Zhang     nnz  = nzi;
395c7272abeSHong Zhang     k    = lnk[n];
396c7272abeSHong Zhang     while (nnz-- && k < i){
397c7272abeSHong Zhang       nzbd++;
398c7272abeSHong Zhang       k = lnk[k];
399c7272abeSHong Zhang     }
400c7272abeSHong Zhang     bdiag[i] = bi[i] + nzbd;
401c7272abeSHong Zhang 
402c7272abeSHong Zhang     /* if free space is not available, make more free space */
403c7272abeSHong Zhang     if (current_space->local_remaining<nzi) {
404c7272abeSHong Zhang       nnz = (n - i)*nzi; /* estimated and max additional space needed */
405c7272abeSHong Zhang       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
406c7272abeSHong Zhang       reallocs++;
40783287d42SBarry Smith     }
40883287d42SBarry Smith 
409c7272abeSHong Zhang     /* copy data into free space, then initialize lnk */
410c7272abeSHong Zhang     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
411c7272abeSHong Zhang     bi_ptr[i] = current_space->array;
412c7272abeSHong Zhang     current_space->array           += nzi;
413c7272abeSHong Zhang     current_space->local_used      += nzi;
414c7272abeSHong Zhang     current_space->local_remaining -= nzi;
415c7272abeSHong Zhang   }
4166cf91177SBarry Smith #if defined(PETSC_USE_INFO)
41783287d42SBarry Smith   if (ai[n] != 0) {
418c7272abeSHong Zhang     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
419ae15b995SBarry Smith     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
420ae15b995SBarry Smith     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
421ae15b995SBarry Smith     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
422ae15b995SBarry Smith     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
42383287d42SBarry Smith   } else {
424c7272abeSHong Zhang     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
42583287d42SBarry Smith   }
42663ba0a88SBarry Smith #endif
42783287d42SBarry Smith 
42883287d42SBarry Smith   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
42983287d42SBarry Smith   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
43083287d42SBarry Smith 
431c7272abeSHong Zhang   /* destroy list of free space and other temporary array(s) */
432c7272abeSHong Zhang   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
433c7272abeSHong Zhang   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
434c7272abeSHong Zhang   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
435c7272abeSHong Zhang   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
43683287d42SBarry Smith 
43783287d42SBarry Smith   /* put together the new matrix */
438719d5645SBarry Smith   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
439719d5645SBarry Smith   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
440719d5645SBarry Smith   b    = (Mat_SeqBAIJ*)(B)->data;
441e6b907acSBarry Smith   b->free_a       = PETSC_TRUE;
442e6b907acSBarry Smith   b->free_ij      = PETSC_TRUE;
443c7272abeSHong Zhang   b->singlemalloc = PETSC_FALSE;
444c7272abeSHong Zhang   ierr          = PetscMalloc((bi[n]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr);
445c7272abeSHong Zhang   b->j          = bj;
446c7272abeSHong Zhang   b->i          = bi;
447c7272abeSHong Zhang   b->diag       = bdiag;
4487f53bb6cSHong Zhang   b->free_diag  = PETSC_TRUE;
44983287d42SBarry Smith   b->ilen       = 0;
45083287d42SBarry Smith   b->imax       = 0;
45183287d42SBarry Smith   b->row        = isrow;
45283287d42SBarry Smith   b->col        = iscol;
453d3d32019SBarry Smith   b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;
45483287d42SBarry Smith   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
45583287d42SBarry Smith   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
45683287d42SBarry Smith   b->icol       = isicol;
45787828ca2SBarry Smith   ierr = PetscMalloc((bs*n+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
458c7272abeSHong Zhang   ierr = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2));CHKERRQ(ierr);
45983287d42SBarry Smith 
460c7272abeSHong Zhang   b->maxnz = b->nz = bi[n] ;
461d5f3da31SBarry Smith   (B)->factortype            =  MAT_FACTOR_LU;
462719d5645SBarry Smith   (B)->info.factor_mallocs   = reallocs;
463719d5645SBarry Smith   (B)->info.fill_ratio_given = f;
464c7272abeSHong Zhang 
46583287d42SBarry Smith   if (ai[n] != 0) {
466c7272abeSHong Zhang     (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
46783287d42SBarry Smith   } else {
468719d5645SBarry Smith     (B)->info.fill_ratio_needed = 0.0;
46983287d42SBarry Smith   }
470c7272abeSHong Zhang 
471db4efbfdSBarry Smith   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
472db4efbfdSBarry Smith   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
4737d18ce8fSMatthew Knepley   both_identity = (PetscTruth) (row_identity && col_identity);
4748b1456e3SHong Zhang   ierr = MatSeqBAIJSetNumericFactorization_inplace(B,both_identity);CHKERRQ(ierr);
47583287d42SBarry Smith   PetscFunctionReturn(0);
47683287d42SBarry Smith  }
477db4efbfdSBarry Smith 
478