xref: /petsc/src/mat/impls/baij/seq/baijfact3.c (revision ae3d28f0b2735ca96b8c68dee0ca72021f7b1688)
1be1d678aSKris Buschelman #define PETSCMAT_DLL
2be1d678aSKris Buschelman 
383287d42SBarry Smith /*
483287d42SBarry Smith     Factorization code for BAIJ format.
583287d42SBarry Smith */
67c4f633dSBarry Smith #include "../src/mat/impls/baij/seq/baij.h"
7c60f0209SBarry Smith #include "../src/mat/blockinvert.h"
883287d42SBarry Smith 
9db4efbfdSBarry Smith #undef __FUNCT__
10*ae3d28f0SHong Zhang #define __FUNCT__ "MatSeqBAIJSetNumericFactorization_newdatastruct"
11db4efbfdSBarry Smith /*
12db4efbfdSBarry Smith    This is used to set the numeric factorization for both LU and ILU symbolic factorization
13db4efbfdSBarry Smith */
14*ae3d28f0SHong Zhang PetscErrorCode MatSeqBAIJSetNumericFactorization_newdatastruct(Mat fact,PetscTruth natural)
15*ae3d28f0SHong Zhang {
16*ae3d28f0SHong Zhang   PetscFunctionBegin;
17*ae3d28f0SHong Zhang   switch (fact->rmap->bs){
18*ae3d28f0SHong Zhang   case 2:
19*ae3d28f0SHong Zhang     fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_newdatastruct;
20*ae3d28f0SHong Zhang     break;
21*ae3d28f0SHong Zhang   case 3:
22*ae3d28f0SHong Zhang     fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_newdatastruct;
23*ae3d28f0SHong Zhang     break;
24*ae3d28f0SHong Zhang   case 4:
25*ae3d28f0SHong Zhang     fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_newdatastruct;
26*ae3d28f0SHong Zhang     break;
27*ae3d28f0SHong Zhang   case 5:
28*ae3d28f0SHong Zhang     fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_newdatastruct;
29*ae3d28f0SHong Zhang     break;
30*ae3d28f0SHong Zhang   case 6:
31*ae3d28f0SHong Zhang     fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_newdatastruct;
32*ae3d28f0SHong Zhang     break;
33*ae3d28f0SHong Zhang   case 7:
34*ae3d28f0SHong Zhang     fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_newdatastruct;
35*ae3d28f0SHong Zhang     break;
36*ae3d28f0SHong Zhang   default:
37*ae3d28f0SHong Zhang     fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_newdatastruct;
38*ae3d28f0SHong Zhang     break;
39*ae3d28f0SHong Zhang   }
40*ae3d28f0SHong Zhang   PetscFunctionReturn(0);
41*ae3d28f0SHong Zhang }
42*ae3d28f0SHong Zhang 
43db4efbfdSBarry Smith PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat inA,PetscTruth natural)
44db4efbfdSBarry Smith {
45db4efbfdSBarry Smith   PetscFunctionBegin;
46db4efbfdSBarry Smith   if (natural) {
47db4efbfdSBarry Smith     switch (inA->rmap->bs) {
48db4efbfdSBarry Smith     case 1:
49db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
50db4efbfdSBarry Smith       break;
51db4efbfdSBarry Smith     case 2:
52db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering;
53db4efbfdSBarry Smith       break;
54db4efbfdSBarry Smith     case 3:
55db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering;
56db4efbfdSBarry Smith       break;
57db4efbfdSBarry Smith     case 4:
5865460251SBarry Smith #if defined(PETSC_USE_SCALAR_MAT_SINGLE)
59db4efbfdSBarry Smith       {
60db4efbfdSBarry Smith         PetscTruth  sse_enabled_local;
61db4efbfdSBarry Smith         PetscErrorCode ierr;
62db4efbfdSBarry Smith         ierr = PetscSSEIsEnabled(inA->comm,&sse_enabled_local,PETSC_NULL);CHKERRQ(ierr);
63db4efbfdSBarry Smith         if (sse_enabled_local) {
64db4efbfdSBarry Smith #  if defined(PETSC_HAVE_SSE)
65db4efbfdSBarry Smith           int i,*AJ=a->j,nz=a->nz,n=a->mbs;
66db4efbfdSBarry Smith           if (n==(unsigned short)n) {
67db4efbfdSBarry Smith             unsigned short *aj=(unsigned short *)AJ;
68db4efbfdSBarry Smith             for (i=0;i<nz;i++) {
69db4efbfdSBarry Smith               aj[i] = (unsigned short)AJ[i];
70db4efbfdSBarry Smith             }
71db4efbfdSBarry Smith             inA->ops->setunfactored   = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE_usj;
72db4efbfdSBarry Smith             inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE_usj;
73db4efbfdSBarry Smith             ierr = PetscInfo(inA,"Using special SSE, in-place natural ordering, ushort j index factor BS=4\n");CHKERRQ(ierr);
74db4efbfdSBarry Smith           } else {
75db4efbfdSBarry Smith         /* Scale the column indices for easier indexing in MatSolve. */
76db4efbfdSBarry Smith /*            for (i=0;i<nz;i++) { */
77db4efbfdSBarry Smith /*              AJ[i] = AJ[i]*4; */
78db4efbfdSBarry Smith /*            } */
79db4efbfdSBarry Smith             inA->ops->setunfactored   = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE;
80db4efbfdSBarry Smith             inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE;
81db4efbfdSBarry Smith             ierr = PetscInfo(inA,"Using special SSE, in-place natural ordering, int j index factor BS=4\n");CHKERRQ(ierr);
82db4efbfdSBarry Smith           }
83db4efbfdSBarry Smith #  else
84db4efbfdSBarry Smith         /* This should never be reached.  If so, problem in PetscSSEIsEnabled. */
85db4efbfdSBarry Smith           SETERRQ(PETSC_ERR_SUP,"SSE Hardware unavailable");
86db4efbfdSBarry Smith #  endif
87db4efbfdSBarry Smith         } else {
88db4efbfdSBarry Smith           inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
89db4efbfdSBarry Smith         }
90db4efbfdSBarry Smith       }
91db4efbfdSBarry Smith #else
92db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
93db4efbfdSBarry Smith #endif
94db4efbfdSBarry Smith       break;
95db4efbfdSBarry Smith     case 5:
96db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
97db4efbfdSBarry Smith       break;
98db4efbfdSBarry Smith     case 6:
99db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering;
100db4efbfdSBarry Smith       break;
101db4efbfdSBarry Smith     case 7:
102db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering;
103db4efbfdSBarry Smith       break;
104db4efbfdSBarry Smith     default:
105db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
106db4efbfdSBarry Smith       break;
107db4efbfdSBarry Smith     }
108db4efbfdSBarry Smith   } else {
109db4efbfdSBarry Smith     switch (inA->rmap->bs) {
110db4efbfdSBarry Smith     case 1:
111db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
112db4efbfdSBarry Smith       break;
113db4efbfdSBarry Smith     case 2:
114db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
115db4efbfdSBarry Smith       break;
116db4efbfdSBarry Smith     case 3:
117db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
118db4efbfdSBarry Smith       break;
119db4efbfdSBarry Smith     case 4:
120db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
121db4efbfdSBarry Smith       break;
122db4efbfdSBarry Smith     case 5:
123db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
124db4efbfdSBarry Smith       break;
125db4efbfdSBarry Smith     case 6:
126db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
127db4efbfdSBarry Smith       break;
128db4efbfdSBarry Smith     case 7:
129db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
130db4efbfdSBarry Smith       break;
131db4efbfdSBarry Smith     default:
132db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
133db4efbfdSBarry Smith       break;
134db4efbfdSBarry Smith     }
135db4efbfdSBarry Smith   }
136db4efbfdSBarry Smith   PetscFunctionReturn(0);
137db4efbfdSBarry Smith }
138db4efbfdSBarry Smith 
13983287d42SBarry Smith /*
14083287d42SBarry Smith     The symbolic factorization code is identical to that for AIJ format,
14183287d42SBarry Smith   except for very small changes since this is now a SeqBAIJ datastructure.
14283287d42SBarry Smith   NOT good code reuse.
14383287d42SBarry Smith */
144c7272abeSHong Zhang #include "petscbt.h"
145c7272abeSHong Zhang #include "../src/mat/utils/freespace.h"
146c7272abeSHong Zhang 
1474a2ae208SSatish Balay #undef __FUNCT__
148faca2338SShri Abhyankar #define __FUNCT__ "MatLUFactorSymbolic_SeqBAIJ_newdatastruct"
149faca2338SShri Abhyankar PetscErrorCode MatLUFactorSymbolic_SeqBAIJ_newdatastruct(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
15083287d42SBarry Smith {
15183287d42SBarry Smith   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data,*b;
152c7272abeSHong Zhang   PetscInt           n=a->mbs,bs = A->rmap->bs,bs2=a->bs2;
153c7272abeSHong Zhang   PetscTruth         row_identity,col_identity,both_identity;
15483287d42SBarry Smith   IS                 isicol;
1556849ba73SBarry Smith   PetscErrorCode     ierr;
156c7272abeSHong Zhang   const PetscInt     *r,*ic;
157c7272abeSHong Zhang   PetscInt           i,*ai=a->i,*aj=a->j;
158c7272abeSHong Zhang   PetscInt           *bi,*bj,*ajtmp;
159c7272abeSHong Zhang   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
160c7272abeSHong Zhang   PetscReal          f;
161c7272abeSHong Zhang   PetscInt           nlnk,*lnk,k,**bi_ptr;
162c7272abeSHong Zhang   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
163c7272abeSHong Zhang   PetscBT            lnkbt;
16483287d42SBarry Smith 
16583287d42SBarry Smith   PetscFunctionBegin;
166d0f46423SBarry Smith   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
16783287d42SBarry Smith   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
16883287d42SBarry Smith   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
16983287d42SBarry Smith   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
17083287d42SBarry Smith 
171bc74c81fSJed Brown   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
172bc74c81fSJed Brown   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
173bc74c81fSJed Brown   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
174a6df321fSShri Abhyankar   bi[0] = bdiag[0] = 0;
175b2b2dd24SShri Abhyankar 
176b2b2dd24SShri Abhyankar   /* linked list for storing column indices of the active row */
177b2b2dd24SShri Abhyankar   nlnk = n + 1;
178b2b2dd24SShri Abhyankar   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
179b2b2dd24SShri Abhyankar 
180b2b2dd24SShri Abhyankar   ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr);
181b2b2dd24SShri Abhyankar 
182b2b2dd24SShri Abhyankar   /* initial FreeSpace size is f*(ai[n]+1) */
183b2b2dd24SShri Abhyankar   f = info->fill;
184b2b2dd24SShri Abhyankar   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
185b2b2dd24SShri Abhyankar   current_space = free_space;
186b2b2dd24SShri Abhyankar 
187b2b2dd24SShri Abhyankar   for (i=0; i<n; i++) {
188b2b2dd24SShri Abhyankar     /* copy previous fill into linked list */
189b2b2dd24SShri Abhyankar     nzi = 0;
190b2b2dd24SShri Abhyankar     nnz = ai[r[i]+1] - ai[r[i]];
191b2b2dd24SShri Abhyankar     if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
192b2b2dd24SShri Abhyankar     ajtmp = aj + ai[r[i]];
193b2b2dd24SShri Abhyankar     ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
194b2b2dd24SShri Abhyankar     nzi += nlnk;
195b2b2dd24SShri Abhyankar 
196b2b2dd24SShri Abhyankar     /* add pivot rows into linked list */
197b2b2dd24SShri Abhyankar     row = lnk[n];
198b2b2dd24SShri Abhyankar     while (row < i) {
199b2b2dd24SShri Abhyankar       nzbd    = bdiag[row] + 1; /* num of entries in the row with column index <= row */
200b2b2dd24SShri Abhyankar       ajtmp   = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
201b2b2dd24SShri Abhyankar       ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
202b2b2dd24SShri Abhyankar       nzi += nlnk;
203b2b2dd24SShri Abhyankar       row  = lnk[row];
204b2b2dd24SShri Abhyankar     }
205b2b2dd24SShri Abhyankar     bi[i+1] = bi[i] + nzi;
206b2b2dd24SShri Abhyankar     im[i]   = nzi;
207b2b2dd24SShri Abhyankar 
208b2b2dd24SShri Abhyankar     /* mark bdiag */
209b2b2dd24SShri Abhyankar     nzbd = 0;
210b2b2dd24SShri Abhyankar     nnz  = nzi;
211b2b2dd24SShri Abhyankar     k    = lnk[n];
212b2b2dd24SShri Abhyankar     while (nnz-- && k < i){
213b2b2dd24SShri Abhyankar       nzbd++;
214b2b2dd24SShri Abhyankar       k = lnk[k];
215b2b2dd24SShri Abhyankar     }
216b2b2dd24SShri Abhyankar     bdiag[i] = nzbd; /* note : bdaig[i] = nnzL as input for PetscFreeSpaceContiguous_LU_v2() */
217b2b2dd24SShri Abhyankar 
218b2b2dd24SShri Abhyankar     /* if free space is not available, make more free space */
219b2b2dd24SShri Abhyankar     if (current_space->local_remaining<nzi) {
220b2b2dd24SShri Abhyankar       nnz = 2*(n - i)*nzi; /* estimated and max additional space needed */
221b2b2dd24SShri Abhyankar       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
222b2b2dd24SShri Abhyankar       reallocs++;
223b2b2dd24SShri Abhyankar     }
224b2b2dd24SShri Abhyankar 
225b2b2dd24SShri Abhyankar     /* copy data into free space, then initialize lnk */
226b2b2dd24SShri Abhyankar     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
227b2b2dd24SShri Abhyankar     bi_ptr[i] = current_space->array;
228b2b2dd24SShri Abhyankar     current_space->array           += nzi;
229b2b2dd24SShri Abhyankar     current_space->local_used      += nzi;
230b2b2dd24SShri Abhyankar     current_space->local_remaining -= nzi;
231b2b2dd24SShri Abhyankar   }
232b2b2dd24SShri Abhyankar #if defined(PETSC_USE_INFO)
233b2b2dd24SShri Abhyankar   if (ai[n] != 0) {
234b2b2dd24SShri Abhyankar     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
235b2b2dd24SShri Abhyankar     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
236b2b2dd24SShri Abhyankar     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
237b2b2dd24SShri Abhyankar     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
238b2b2dd24SShri Abhyankar     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
239b2b2dd24SShri Abhyankar   } else {
240b2b2dd24SShri Abhyankar     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
241b2b2dd24SShri Abhyankar   }
242b2b2dd24SShri Abhyankar #endif
243b2b2dd24SShri Abhyankar 
244b2b2dd24SShri Abhyankar   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
245b2b2dd24SShri Abhyankar   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
246b2b2dd24SShri Abhyankar 
247b2b2dd24SShri Abhyankar   /* destroy list of free space and other temporary array(s) */
248b2b2dd24SShri Abhyankar   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
249b2b2dd24SShri Abhyankar   ierr = PetscFreeSpaceContiguous_LU_v2(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr);
250b2b2dd24SShri Abhyankar   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
251b2b2dd24SShri Abhyankar   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
252b2b2dd24SShri Abhyankar 
253b2b2dd24SShri Abhyankar   /* put together the new matrix */
254b2b2dd24SShri Abhyankar   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
255b2b2dd24SShri Abhyankar   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
256b2b2dd24SShri Abhyankar   b    = (Mat_SeqBAIJ*)(B)->data;
257b2b2dd24SShri Abhyankar   b->free_a       = PETSC_TRUE;
258b2b2dd24SShri Abhyankar   b->free_ij      = PETSC_TRUE;
259b2b2dd24SShri Abhyankar   b->singlemalloc = PETSC_FALSE;
260b2b2dd24SShri Abhyankar   ierr          = PetscMalloc((bdiag[0]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr);
261b2b2dd24SShri Abhyankar   b->j          = bj;
262b2b2dd24SShri Abhyankar   b->i          = bi;
263b2b2dd24SShri Abhyankar   b->diag       = bdiag;
264b2b2dd24SShri Abhyankar   b->free_diag  = PETSC_TRUE;
265b2b2dd24SShri Abhyankar   b->ilen       = 0;
266b2b2dd24SShri Abhyankar   b->imax       = 0;
267b2b2dd24SShri Abhyankar   b->row        = isrow;
268b2b2dd24SShri Abhyankar   b->col        = iscol;
269b2b2dd24SShri Abhyankar   b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;
270b2b2dd24SShri Abhyankar   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
271b2b2dd24SShri Abhyankar   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
272b2b2dd24SShri Abhyankar   b->icol       = isicol;
273b2b2dd24SShri Abhyankar   ierr = PetscMalloc((bs*n+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
274b2b2dd24SShri Abhyankar   ierr = PetscLogObjectMemory(B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2));CHKERRQ(ierr);
275b2b2dd24SShri Abhyankar 
276b2b2dd24SShri Abhyankar   b->maxnz = b->nz = bdiag[0]+1;
277b2b2dd24SShri Abhyankar   B->factor                =  MAT_FACTOR_LU;
278b2b2dd24SShri Abhyankar   B->info.factor_mallocs   = reallocs;
279b2b2dd24SShri Abhyankar   B->info.fill_ratio_given = f;
280b2b2dd24SShri Abhyankar 
281b2b2dd24SShri Abhyankar   if (ai[n] != 0) {
282b2b2dd24SShri Abhyankar     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
283b2b2dd24SShri Abhyankar   } else {
284b2b2dd24SShri Abhyankar     B->info.fill_ratio_needed = 0.0;
285b2b2dd24SShri Abhyankar   }
286b2b2dd24SShri Abhyankar 
287b2b2dd24SShri Abhyankar   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
288b2b2dd24SShri Abhyankar   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
289b2b2dd24SShri Abhyankar   both_identity = (PetscTruth) (row_identity && col_identity);
290b2b2dd24SShri Abhyankar   if(both_identity){
291b2b2dd24SShri Abhyankar      switch(bs){
292b2b2dd24SShri Abhyankar      case 2:
293c0c7eb62SShri Abhyankar         B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_newdatastruct;
294b2b2dd24SShri Abhyankar         B->ops->solve           = MatSolve_SeqBAIJ_2_NaturalOrdering_newdatastruct_v2;
295b2b2dd24SShri Abhyankar         break;
296b2b2dd24SShri Abhyankar      case 3:
297c0c7eb62SShri Abhyankar 	B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering_newdatastruct;
298b2b2dd24SShri Abhyankar         B->ops->solve = MatSolve_SeqBAIJ_3_NaturalOrdering_newdatastruct_v2;
299b2b2dd24SShri Abhyankar         break;
300b2b2dd24SShri Abhyankar      case 4:
301c0c7eb62SShri Abhyankar         B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_newdatastruct;
302b2b2dd24SShri Abhyankar         B->ops->solve = MatSolve_SeqBAIJ_4_NaturalOrdering_newdatastruct_v2;
303b2b2dd24SShri Abhyankar         break;
304b2b2dd24SShri Abhyankar      case 5:
305c0c7eb62SShri Abhyankar         B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering_newdatastruct;
30653cca76cSShri Abhyankar         B->ops->solve = MatSolve_SeqBAIJ_5_NaturalOrdering_newdatastruct_v2;
307b2b2dd24SShri Abhyankar         break;
308b2b2dd24SShri Abhyankar      case 6:
309c0c7eb62SShri Abhyankar         B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering_newdatastruct;
31053cca76cSShri Abhyankar         B->ops->solve = MatSolve_SeqBAIJ_6_NaturalOrdering_newdatastruct_v2;
311b2b2dd24SShri Abhyankar         break;
312b2b2dd24SShri Abhyankar      case 7:
313c0c7eb62SShri Abhyankar         B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering_newdatastruct;
31453cca76cSShri Abhyankar         B->ops->solve = MatSolve_SeqBAIJ_7_NaturalOrdering_newdatastruct_v2;
315b2b2dd24SShri Abhyankar         break;
316b2b2dd24SShri Abhyankar      default:
317c0c7eb62SShri Abhyankar         B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_newdatastruct;
3181a83e813SShri Abhyankar         B->ops->solve = MatSolve_SeqBAIJ_N_NaturalOrdering_newdatastruct_v2;
319b2b2dd24SShri Abhyankar       }
320b2b2dd24SShri Abhyankar    }
321b2b2dd24SShri Abhyankar    else {
322b2b2dd24SShri Abhyankar      switch(bs){
323b2b2dd24SShri Abhyankar      case 2:
324c0c7eb62SShri Abhyankar         B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_newdatastruct;
3250c4413a7SShri Abhyankar         B->ops->solve           = MatSolve_SeqBAIJ_2_newdatastruct_v2;
326b2b2dd24SShri Abhyankar         break;
327b2b2dd24SShri Abhyankar      case 3:
328c0c7eb62SShri Abhyankar 	B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_newdatastruct;
3290c4413a7SShri Abhyankar         B->ops->solve = MatSolve_SeqBAIJ_3_newdatastruct_v2;
330b2b2dd24SShri Abhyankar         break;
331b2b2dd24SShri Abhyankar      case 4:
332c0c7eb62SShri Abhyankar         B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_newdatastruct;
33378bb4007SShri Abhyankar         B->ops->solve = MatSolve_SeqBAIJ_4_newdatastruct_v2;
334b2b2dd24SShri Abhyankar         break;
335b2b2dd24SShri Abhyankar      case 5:
336c0c7eb62SShri Abhyankar         B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_newdatastruct;
33778bb4007SShri Abhyankar         B->ops->solve = MatSolve_SeqBAIJ_5_newdatastruct_v2;
338b2b2dd24SShri Abhyankar         break;
339b2b2dd24SShri Abhyankar      case 6:
340c0c7eb62SShri Abhyankar         B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_newdatastruct;
3416506fda5SShri Abhyankar         B->ops->solve = MatSolve_SeqBAIJ_6_newdatastruct_v2;
342b2b2dd24SShri Abhyankar         break;
343b2b2dd24SShri Abhyankar      case 7:
344c0c7eb62SShri Abhyankar         B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_newdatastruct;
34535aa4fcfSShri Abhyankar         B->ops->solve = MatSolve_SeqBAIJ_7_newdatastruct_v2;
346b2b2dd24SShri Abhyankar         break;
347b2b2dd24SShri Abhyankar      default:
348c0c7eb62SShri Abhyankar         B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_newdatastruct;
34935aa4fcfSShri Abhyankar         B->ops->solve = MatSolve_SeqBAIJ_N_newdatastruct_v2;
350b2b2dd24SShri Abhyankar       }
351b2b2dd24SShri Abhyankar    }
352b2b2dd24SShri Abhyankar   /*  ierr = MatSeqBAIJSetNumericFactorization(B,both_identity);CHKERRQ(ierr); */
353b2b2dd24SShri Abhyankar   PetscFunctionReturn(0);
354b2b2dd24SShri Abhyankar  }
355b2b2dd24SShri Abhyankar 
356b2b2dd24SShri Abhyankar #undef __FUNCT__
357faca2338SShri Abhyankar #define __FUNCT__ "MatLUFactorSymbolic_SeqBAIJ"
358faca2338SShri Abhyankar PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
359faca2338SShri Abhyankar {
360faca2338SShri Abhyankar   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data,*b;
361faca2338SShri Abhyankar   PetscInt           n=a->mbs,bs = A->rmap->bs,bs2=a->bs2;
362faca2338SShri Abhyankar   PetscTruth         row_identity,col_identity,both_identity;
363faca2338SShri Abhyankar   IS                 isicol;
364faca2338SShri Abhyankar   PetscErrorCode     ierr;
365faca2338SShri Abhyankar   const PetscInt     *r,*ic;
366faca2338SShri Abhyankar   PetscInt           i,*ai=a->i,*aj=a->j;
367faca2338SShri Abhyankar   PetscInt           *bi,*bj,*ajtmp;
368faca2338SShri Abhyankar   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
369faca2338SShri Abhyankar   PetscReal          f;
370faca2338SShri Abhyankar   PetscInt           nlnk,*lnk,k,**bi_ptr;
371faca2338SShri Abhyankar   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
372faca2338SShri Abhyankar   PetscBT            lnkbt;
373faca2338SShri Abhyankar   PetscTruth         newdatastruct=PETSC_FALSE;
374faca2338SShri Abhyankar 
375faca2338SShri Abhyankar   PetscFunctionBegin;
376faca2338SShri Abhyankar   ierr = PetscOptionsGetTruth(PETSC_NULL,"-lu_new",&newdatastruct,PETSC_NULL);CHKERRQ(ierr);
377faca2338SShri Abhyankar   if(newdatastruct){
378faca2338SShri Abhyankar     ierr = MatLUFactorSymbolic_SeqBAIJ_newdatastruct(B,A,isrow,iscol,info);CHKERRQ(ierr);
379faca2338SShri Abhyankar     PetscFunctionReturn(0);
380faca2338SShri Abhyankar   }
381faca2338SShri Abhyankar 
382faca2338SShri Abhyankar   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
383faca2338SShri Abhyankar   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
384faca2338SShri Abhyankar   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
385faca2338SShri Abhyankar   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
386faca2338SShri Abhyankar 
387bc74c81fSJed Brown   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
388bc74c81fSJed Brown   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
389bc74c81fSJed Brown   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
390bc74c81fSJed Brown 
391a6df321fSShri Abhyankar   bi[0] = bdiag[0] = 0;
392c7272abeSHong Zhang 
393c7272abeSHong Zhang   /* linked list for storing column indices of the active row */
394c7272abeSHong Zhang   nlnk = n + 1;
395c7272abeSHong Zhang   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
396c7272abeSHong Zhang 
397c7272abeSHong Zhang   ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr);
398c7272abeSHong Zhang 
399c7272abeSHong Zhang   /* initial FreeSpace size is f*(ai[n]+1) */
400c7272abeSHong Zhang   f = info->fill;
401c7272abeSHong Zhang   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
402c7272abeSHong Zhang   current_space = free_space;
40383287d42SBarry Smith 
40483287d42SBarry Smith   for (i=0; i<n; i++) {
405c7272abeSHong Zhang     /* copy previous fill into linked list */
40683287d42SBarry Smith     nzi = 0;
407c7272abeSHong Zhang     nnz = ai[r[i]+1] - ai[r[i]];
408c7272abeSHong Zhang     if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
409c7272abeSHong Zhang     ajtmp = aj + ai[r[i]];
410c7272abeSHong Zhang     ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
411c7272abeSHong Zhang     nzi += nlnk;
412c7272abeSHong Zhang 
413c7272abeSHong Zhang     /* add pivot rows into linked list */
414c7272abeSHong Zhang     row = lnk[n];
415c7272abeSHong Zhang     while (row < i) {
416c7272abeSHong Zhang       nzbd    = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
417c7272abeSHong Zhang       ajtmp   = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
418c7272abeSHong Zhang       ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
419c7272abeSHong Zhang       nzi += nlnk;
420c7272abeSHong Zhang       row  = lnk[row];
42183287d42SBarry Smith     }
422c7272abeSHong Zhang     bi[i+1] = bi[i] + nzi;
423c7272abeSHong Zhang     im[i]   = nzi;
424c7272abeSHong Zhang 
425c7272abeSHong Zhang     /* mark bdiag */
426c7272abeSHong Zhang     nzbd = 0;
427c7272abeSHong Zhang     nnz  = nzi;
428c7272abeSHong Zhang     k    = lnk[n];
429c7272abeSHong Zhang     while (nnz-- && k < i){
430c7272abeSHong Zhang       nzbd++;
431c7272abeSHong Zhang       k = lnk[k];
432c7272abeSHong Zhang     }
433c7272abeSHong Zhang     bdiag[i] = bi[i] + nzbd;
434c7272abeSHong Zhang 
435c7272abeSHong Zhang     /* if free space is not available, make more free space */
436c7272abeSHong Zhang     if (current_space->local_remaining<nzi) {
437c7272abeSHong Zhang       nnz = (n - i)*nzi; /* estimated and max additional space needed */
438c7272abeSHong Zhang       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
439c7272abeSHong Zhang       reallocs++;
44083287d42SBarry Smith     }
44183287d42SBarry Smith 
442c7272abeSHong Zhang     /* copy data into free space, then initialize lnk */
443c7272abeSHong Zhang     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
444c7272abeSHong Zhang     bi_ptr[i] = current_space->array;
445c7272abeSHong Zhang     current_space->array           += nzi;
446c7272abeSHong Zhang     current_space->local_used      += nzi;
447c7272abeSHong Zhang     current_space->local_remaining -= nzi;
448c7272abeSHong Zhang   }
4496cf91177SBarry Smith #if defined(PETSC_USE_INFO)
45083287d42SBarry Smith   if (ai[n] != 0) {
451c7272abeSHong Zhang     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
452ae15b995SBarry Smith     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
453ae15b995SBarry Smith     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
454ae15b995SBarry Smith     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
455ae15b995SBarry Smith     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
45683287d42SBarry Smith   } else {
457c7272abeSHong Zhang     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
45883287d42SBarry Smith   }
45963ba0a88SBarry Smith #endif
46083287d42SBarry Smith 
46183287d42SBarry Smith   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
46283287d42SBarry Smith   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
46383287d42SBarry Smith 
464c7272abeSHong Zhang   /* destroy list of free space and other temporary array(s) */
465c7272abeSHong Zhang   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
466c7272abeSHong Zhang   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
467c7272abeSHong Zhang   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
468c7272abeSHong Zhang   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
46983287d42SBarry Smith 
47083287d42SBarry Smith   /* put together the new matrix */
471719d5645SBarry Smith   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
472719d5645SBarry Smith   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
473719d5645SBarry Smith   b    = (Mat_SeqBAIJ*)(B)->data;
474e6b907acSBarry Smith   b->free_a       = PETSC_TRUE;
475e6b907acSBarry Smith   b->free_ij      = PETSC_TRUE;
476c7272abeSHong Zhang   b->singlemalloc = PETSC_FALSE;
477c7272abeSHong Zhang   ierr          = PetscMalloc((bi[n]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr);
478c7272abeSHong Zhang   b->j          = bj;
479c7272abeSHong Zhang   b->i          = bi;
480c7272abeSHong Zhang   b->diag       = bdiag;
4817f53bb6cSHong Zhang   b->free_diag  = PETSC_TRUE;
48283287d42SBarry Smith   b->ilen       = 0;
48383287d42SBarry Smith   b->imax       = 0;
48483287d42SBarry Smith   b->row        = isrow;
48583287d42SBarry Smith   b->col        = iscol;
486d3d32019SBarry Smith   b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;
48783287d42SBarry Smith   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
48883287d42SBarry Smith   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
48983287d42SBarry Smith   b->icol       = isicol;
49087828ca2SBarry Smith   ierr = PetscMalloc((bs*n+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
491c7272abeSHong Zhang   ierr = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2));CHKERRQ(ierr);
49283287d42SBarry Smith 
493c7272abeSHong Zhang   b->maxnz = b->nz = bi[n] ;
494c7272abeSHong Zhang   (B)->factor                =  MAT_FACTOR_LU;
495719d5645SBarry Smith   (B)->info.factor_mallocs   = reallocs;
496719d5645SBarry Smith   (B)->info.fill_ratio_given = f;
497c7272abeSHong Zhang 
49883287d42SBarry Smith   if (ai[n] != 0) {
499c7272abeSHong Zhang     (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
50083287d42SBarry Smith   } else {
501719d5645SBarry Smith     (B)->info.fill_ratio_needed = 0.0;
50283287d42SBarry Smith   }
503c7272abeSHong Zhang 
504db4efbfdSBarry Smith   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
505db4efbfdSBarry Smith   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
5067d18ce8fSMatthew Knepley   both_identity = (PetscTruth) (row_identity && col_identity);
50741df41f0SMatthew Knepley   ierr = MatSeqBAIJSetNumericFactorization(B,both_identity);CHKERRQ(ierr);
50883287d42SBarry Smith   PetscFunctionReturn(0);
50983287d42SBarry Smith  }
510db4efbfdSBarry Smith 
511