xref: /petsc/src/mat/impls/baij/seq/baijfact3.c (revision b2b2dd246975d7f9c8a1571def503d28e659d8b1)
1be1d678aSKris Buschelman #define PETSCMAT_DLL
2be1d678aSKris Buschelman 
383287d42SBarry Smith /*
483287d42SBarry Smith     Factorization code for BAIJ format.
583287d42SBarry Smith */
67c4f633dSBarry Smith #include "../src/mat/impls/baij/seq/baij.h"
7c60f0209SBarry Smith #include "../src/mat/blockinvert.h"
883287d42SBarry Smith 
9db4efbfdSBarry Smith #undef __FUNCT__
10db4efbfdSBarry Smith #define __FUNCT__ "MatSeqBAIJSetNumericFactorization"
11db4efbfdSBarry Smith /*
12db4efbfdSBarry Smith    This is used to set the numeric factorization for both LU and ILU symbolic factorization
13db4efbfdSBarry Smith */
14db4efbfdSBarry Smith PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat inA,PetscTruth natural)
15db4efbfdSBarry Smith {
16db4efbfdSBarry Smith   PetscFunctionBegin;
17db4efbfdSBarry Smith   if (natural) {
18db4efbfdSBarry Smith     switch (inA->rmap->bs) {
19db4efbfdSBarry Smith     case 1:
20db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
21db4efbfdSBarry Smith       break;
22db4efbfdSBarry Smith     case 2:
23db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering;
24db4efbfdSBarry Smith       break;
25db4efbfdSBarry Smith     case 3:
26db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering;
27db4efbfdSBarry Smith       break;
28db4efbfdSBarry Smith     case 4:
29db4efbfdSBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
30db4efbfdSBarry Smith       {
31db4efbfdSBarry Smith         PetscTruth  sse_enabled_local;
32db4efbfdSBarry Smith         PetscErrorCode ierr;
33db4efbfdSBarry Smith         ierr = PetscSSEIsEnabled(inA->comm,&sse_enabled_local,PETSC_NULL);CHKERRQ(ierr);
34db4efbfdSBarry Smith         if (sse_enabled_local) {
35db4efbfdSBarry Smith #  if defined(PETSC_HAVE_SSE)
36db4efbfdSBarry Smith           int i,*AJ=a->j,nz=a->nz,n=a->mbs;
37db4efbfdSBarry Smith           if (n==(unsigned short)n) {
38db4efbfdSBarry Smith             unsigned short *aj=(unsigned short *)AJ;
39db4efbfdSBarry Smith             for (i=0;i<nz;i++) {
40db4efbfdSBarry Smith               aj[i] = (unsigned short)AJ[i];
41db4efbfdSBarry Smith             }
42db4efbfdSBarry Smith             inA->ops->setunfactored   = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE_usj;
43db4efbfdSBarry Smith             inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE_usj;
44db4efbfdSBarry Smith             ierr = PetscInfo(inA,"Using special SSE, in-place natural ordering, ushort j index factor BS=4\n");CHKERRQ(ierr);
45db4efbfdSBarry Smith           } else {
46db4efbfdSBarry Smith         /* Scale the column indices for easier indexing in MatSolve. */
47db4efbfdSBarry Smith /*            for (i=0;i<nz;i++) { */
48db4efbfdSBarry Smith /*              AJ[i] = AJ[i]*4; */
49db4efbfdSBarry Smith /*            } */
50db4efbfdSBarry Smith             inA->ops->setunfactored   = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE;
51db4efbfdSBarry Smith             inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE;
52db4efbfdSBarry Smith             ierr = PetscInfo(inA,"Using special SSE, in-place natural ordering, int j index factor BS=4\n");CHKERRQ(ierr);
53db4efbfdSBarry Smith           }
54db4efbfdSBarry Smith #  else
55db4efbfdSBarry Smith         /* This should never be reached.  If so, problem in PetscSSEIsEnabled. */
56db4efbfdSBarry Smith           SETERRQ(PETSC_ERR_SUP,"SSE Hardware unavailable");
57db4efbfdSBarry Smith #  endif
58db4efbfdSBarry Smith         } else {
59db4efbfdSBarry Smith           inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
60db4efbfdSBarry Smith         }
61db4efbfdSBarry Smith       }
62db4efbfdSBarry Smith #else
63db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
64db4efbfdSBarry Smith #endif
65db4efbfdSBarry Smith       break;
66db4efbfdSBarry Smith     case 5:
67db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
68db4efbfdSBarry Smith       break;
69db4efbfdSBarry Smith     case 6:
70db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering;
71db4efbfdSBarry Smith       break;
72db4efbfdSBarry Smith     case 7:
73db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering;
74db4efbfdSBarry Smith       break;
75db4efbfdSBarry Smith     default:
76db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
77db4efbfdSBarry Smith       break;
78db4efbfdSBarry Smith     }
79db4efbfdSBarry Smith   } else {
80db4efbfdSBarry Smith     switch (inA->rmap->bs) {
81db4efbfdSBarry Smith     case 1:
82db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
83db4efbfdSBarry Smith       break;
84db4efbfdSBarry Smith     case 2:
85db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
86db4efbfdSBarry Smith       break;
87db4efbfdSBarry Smith     case 3:
88db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
89db4efbfdSBarry Smith       break;
90db4efbfdSBarry Smith     case 4:
91db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
92db4efbfdSBarry Smith       break;
93db4efbfdSBarry Smith     case 5:
94db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
95db4efbfdSBarry Smith       break;
96db4efbfdSBarry Smith     case 6:
97db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
98db4efbfdSBarry Smith       break;
99db4efbfdSBarry Smith     case 7:
100db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
101db4efbfdSBarry Smith       break;
102db4efbfdSBarry Smith     default:
103db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
104db4efbfdSBarry Smith       break;
105db4efbfdSBarry Smith     }
106db4efbfdSBarry Smith   }
107db4efbfdSBarry Smith   PetscFunctionReturn(0);
108db4efbfdSBarry Smith }
109db4efbfdSBarry Smith 
11083287d42SBarry Smith /*
11183287d42SBarry Smith     The symbolic factorization code is identical to that for AIJ format,
11283287d42SBarry Smith   except for very small changes since this is now a SeqBAIJ datastructure.
11383287d42SBarry Smith   NOT good code reuse.
11483287d42SBarry Smith */
115c7272abeSHong Zhang #include "petscbt.h"
116c7272abeSHong Zhang #include "../src/mat/utils/freespace.h"
117c7272abeSHong Zhang 
1184a2ae208SSatish Balay #undef __FUNCT__
119faca2338SShri Abhyankar #define __FUNCT__ "MatLUFactorSymbolic_SeqBAIJ_newdatastruct"
120faca2338SShri Abhyankar PetscErrorCode MatLUFactorSymbolic_SeqBAIJ_newdatastruct(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
12183287d42SBarry Smith {
12283287d42SBarry Smith   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data,*b;
123c7272abeSHong Zhang   PetscInt           n=a->mbs,bs = A->rmap->bs,bs2=a->bs2;
124c7272abeSHong Zhang   PetscTruth         row_identity,col_identity,both_identity;
12583287d42SBarry Smith   IS                 isicol;
1266849ba73SBarry Smith   PetscErrorCode     ierr;
127c7272abeSHong Zhang   const PetscInt     *r,*ic;
128c7272abeSHong Zhang   PetscInt           i,*ai=a->i,*aj=a->j;
129c7272abeSHong Zhang   PetscInt           *bi,*bj,*ajtmp;
130c7272abeSHong Zhang   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
131c7272abeSHong Zhang   PetscReal          f;
132c7272abeSHong Zhang   PetscInt           nlnk,*lnk,k,**bi_ptr;
133c7272abeSHong Zhang   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
134c7272abeSHong Zhang   PetscBT            lnkbt;
13583287d42SBarry Smith 
13683287d42SBarry Smith   PetscFunctionBegin;
137d0f46423SBarry Smith   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
13883287d42SBarry Smith   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
13983287d42SBarry Smith   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
14083287d42SBarry Smith   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
14183287d42SBarry Smith 
14283287d42SBarry Smith   /* get new row pointers */
143faca2338SShri Abhyankar   ierr = PetscMalloc((2*n+2)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
144faca2338SShri Abhyankar   bi[0] = 0;
145faca2338SShri Abhyankar 
146faca2338SShri Abhyankar   /* bdiag is location of diagonal in factor */
147faca2338SShri Abhyankar   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
148faca2338SShri Abhyankar   bdiag[0] = 0;
149faca2338SShri Abhyankar 
150faca2338SShri Abhyankar   /* linked list for storing column indices of the active row */
151faca2338SShri Abhyankar   nlnk = n + 1;
152faca2338SShri Abhyankar   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
153faca2338SShri Abhyankar 
154faca2338SShri Abhyankar   ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr);
155faca2338SShri Abhyankar 
156faca2338SShri Abhyankar   /* initial FreeSpace size is f*(ai[n]+1) */
157faca2338SShri Abhyankar   f = info->fill;
158faca2338SShri Abhyankar   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
159faca2338SShri Abhyankar   current_space = free_space;
160faca2338SShri Abhyankar 
161faca2338SShri Abhyankar   for (i=0; i<n; i++) {
162faca2338SShri Abhyankar     /* copy previous fill into linked list */
163faca2338SShri Abhyankar     nzi = 0;
164faca2338SShri Abhyankar     nnz = ai[r[i]+1] - ai[r[i]];
165faca2338SShri Abhyankar     if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
166faca2338SShri Abhyankar     ajtmp = aj + ai[r[i]];
167faca2338SShri Abhyankar     ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
168faca2338SShri Abhyankar     nzi += nlnk;
169faca2338SShri Abhyankar 
170faca2338SShri Abhyankar     /* add pivot rows into linked list */
171faca2338SShri Abhyankar     row = lnk[n];
172faca2338SShri Abhyankar     while (row < i) {
1738bfb2e2bSShri Abhyankar       nzbd    = bdiag[row] + 1; /* num of entries in the row with column index <= row */
174faca2338SShri Abhyankar       ajtmp   = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
175faca2338SShri Abhyankar       ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
176faca2338SShri Abhyankar       nzi += nlnk;
177faca2338SShri Abhyankar       row  = lnk[row];
178faca2338SShri Abhyankar     }
179faca2338SShri Abhyankar     bi[i+1] = bi[i] + nzi;
180faca2338SShri Abhyankar     im[i]   = nzi;
181faca2338SShri Abhyankar 
182faca2338SShri Abhyankar     /* mark bdiag */
183faca2338SShri Abhyankar     nzbd = 0;
184faca2338SShri Abhyankar     nnz  = nzi;
185faca2338SShri Abhyankar     k    = lnk[n];
186faca2338SShri Abhyankar     while (nnz-- && k < i){
187faca2338SShri Abhyankar       nzbd++;
188faca2338SShri Abhyankar       k = lnk[k];
189faca2338SShri Abhyankar     }
190783ef271SHong Zhang     bdiag[i] = nzbd; /* note : bdaig[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
191faca2338SShri Abhyankar 
192faca2338SShri Abhyankar     /* if free space is not available, make more free space */
193faca2338SShri Abhyankar     if (current_space->local_remaining<nzi) {
194faca2338SShri Abhyankar       nnz = 2*(n - i)*nzi; /* estimated and max additional space needed */
195faca2338SShri Abhyankar       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
196faca2338SShri Abhyankar       reallocs++;
197faca2338SShri Abhyankar     }
198faca2338SShri Abhyankar 
199faca2338SShri Abhyankar     /* copy data into free space, then initialize lnk */
200faca2338SShri Abhyankar     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
201faca2338SShri Abhyankar     bi_ptr[i] = current_space->array;
202faca2338SShri Abhyankar     current_space->array           += nzi;
203faca2338SShri Abhyankar     current_space->local_used      += nzi;
204faca2338SShri Abhyankar     current_space->local_remaining -= nzi;
205faca2338SShri Abhyankar   }
206faca2338SShri Abhyankar #if defined(PETSC_USE_INFO)
207faca2338SShri Abhyankar   if (ai[n] != 0) {
208faca2338SShri Abhyankar     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
209faca2338SShri Abhyankar     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
210faca2338SShri Abhyankar     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
211faca2338SShri Abhyankar     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
212faca2338SShri Abhyankar     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
213faca2338SShri Abhyankar   } else {
214faca2338SShri Abhyankar     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
215faca2338SShri Abhyankar   }
216faca2338SShri Abhyankar #endif
217faca2338SShri Abhyankar 
218faca2338SShri Abhyankar   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
219faca2338SShri Abhyankar   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
220faca2338SShri Abhyankar 
221faca2338SShri Abhyankar   /* destroy list of free space and other temporary array(s) */
222faca2338SShri Abhyankar   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
223783ef271SHong Zhang   ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr);
224faca2338SShri Abhyankar   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
225faca2338SShri Abhyankar   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
226faca2338SShri Abhyankar 
227faca2338SShri Abhyankar   /* put together the new matrix */
228faca2338SShri Abhyankar   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
229faca2338SShri Abhyankar   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
230faca2338SShri Abhyankar   b    = (Mat_SeqBAIJ*)(B)->data;
231faca2338SShri Abhyankar   b->free_a       = PETSC_TRUE;
232faca2338SShri Abhyankar   b->free_ij      = PETSC_TRUE;
233faca2338SShri Abhyankar   b->singlemalloc = PETSC_FALSE;
234faca2338SShri Abhyankar   ierr          = PetscMalloc((bi[2*n+1])*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr);
235faca2338SShri Abhyankar   b->j          = bj;
236faca2338SShri Abhyankar   b->i          = bi;
237faca2338SShri Abhyankar   b->diag       = bdiag;
2387f53bb6cSHong Zhang   b->free_diag  = PETSC_TRUE;
239faca2338SShri Abhyankar   b->ilen       = 0;
240faca2338SShri Abhyankar   b->imax       = 0;
241faca2338SShri Abhyankar   b->row        = isrow;
242faca2338SShri Abhyankar   b->col        = iscol;
243faca2338SShri Abhyankar   b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;
244faca2338SShri Abhyankar   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
245faca2338SShri Abhyankar   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
246faca2338SShri Abhyankar   b->icol       = isicol;
247faca2338SShri Abhyankar   ierr = PetscMalloc((bs*n+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
248faca2338SShri Abhyankar   ierr = PetscLogObjectMemory(B,(bi[2*n+1])*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2));CHKERRQ(ierr);
249faca2338SShri Abhyankar 
250faca2338SShri Abhyankar   b->maxnz = b->nz = bi[2*n+1];
2514c000e2eSHong Zhang   B->factor                =  MAT_FACTOR_LU;
2524c000e2eSHong Zhang   B->info.factor_mallocs   = reallocs;
2534c000e2eSHong Zhang   B->info.fill_ratio_given = f;
254faca2338SShri Abhyankar 
255faca2338SShri Abhyankar   if (ai[n] != 0) {
2564c000e2eSHong Zhang     B->info.fill_ratio_needed = ((PetscReal)bi[2*n+1])/((PetscReal)ai[n]);
257faca2338SShri Abhyankar   } else {
2584c000e2eSHong Zhang     B->info.fill_ratio_needed = 0.0;
259faca2338SShri Abhyankar   }
260b588c5a2SHong Zhang 
261faca2338SShri Abhyankar   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
262faca2338SShri Abhyankar   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
263faca2338SShri Abhyankar   both_identity = (PetscTruth) (row_identity && col_identity);
2648bfb2e2bSShri Abhyankar   if(both_identity){
2658bfb2e2bSShri Abhyankar      switch(bs){
2668bfb2e2bSShri Abhyankar      case 2:
2674c000e2eSHong Zhang         B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_newdatastruct;
2684c000e2eSHong Zhang         B->ops->solve           = MatSolve_SeqBAIJ_2_NaturalOrdering_newdatastruct;
2698bfb2e2bSShri Abhyankar         break;
2708bfb2e2bSShri Abhyankar      case 3:
271359bf53fSShri Abhyankar 	B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering_newdatastruct;
2724c000e2eSHong Zhang         B->ops->solve = MatSolve_SeqBAIJ_3_NaturalOrdering_newdatastruct;
2738bfb2e2bSShri Abhyankar         break;
2748bfb2e2bSShri Abhyankar      case 4:
275209027a4SShri Abhyankar         B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_newdatastruct;
2764c000e2eSHong Zhang         B->ops->solve = MatSolve_SeqBAIJ_4_NaturalOrdering_newdatastruct;
2778bfb2e2bSShri Abhyankar         break;
2788bfb2e2bSShri Abhyankar      case 5:
2790deeaf61SShri Abhyankar         B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering_newdatastruct;
2804c000e2eSHong Zhang         B->ops->solve = MatSolve_SeqBAIJ_5_NaturalOrdering_newdatastruct;
2818bfb2e2bSShri Abhyankar         break;
2828bfb2e2bSShri Abhyankar      case 6:
283bef36659SShri Abhyankar         B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering_newdatastruct;
2844c000e2eSHong Zhang         B->ops->solve = MatSolve_SeqBAIJ_6_NaturalOrdering_newdatastruct;
2858bfb2e2bSShri Abhyankar         break;
2868bfb2e2bSShri Abhyankar      case 7:
287bef36659SShri Abhyankar         B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering_newdatastruct;
2884c000e2eSHong Zhang         B->ops->solve = MatSolve_SeqBAIJ_7_NaturalOrdering_newdatastruct;
2898bfb2e2bSShri Abhyankar         break;
2908bfb2e2bSShri Abhyankar      default:
291bef36659SShri Abhyankar         B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_newdatastruct;
2924c000e2eSHong Zhang         B->ops->solve = MatSolve_SeqBAIJ_N_NaturalOrdering_newdatastruct;
2938bfb2e2bSShri Abhyankar       }
2948bfb2e2bSShri Abhyankar    }
2958bfb2e2bSShri Abhyankar    else {
2968bfb2e2bSShri Abhyankar      switch(bs){
2978bfb2e2bSShri Abhyankar      case 2:
2984c000e2eSHong Zhang         B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_newdatastruct;
2994c000e2eSHong Zhang         B->ops->solve           = MatSolve_SeqBAIJ_2_newdatastruct;
3008bfb2e2bSShri Abhyankar         break;
3018bfb2e2bSShri Abhyankar      case 3:
30217542729SShri Abhyankar 	B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_newdatastruct;
3034c000e2eSHong Zhang         B->ops->solve = MatSolve_SeqBAIJ_3_newdatastruct;
3048bfb2e2bSShri Abhyankar         break;
3058bfb2e2bSShri Abhyankar      case 4:
306209027a4SShri Abhyankar         B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_newdatastruct;
3074c000e2eSHong Zhang         B->ops->solve = MatSolve_SeqBAIJ_4_newdatastruct;
3088bfb2e2bSShri Abhyankar         break;
3098bfb2e2bSShri Abhyankar      case 5:
3100deeaf61SShri Abhyankar         B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_newdatastruct;
3114c000e2eSHong Zhang         B->ops->solve = MatSolve_SeqBAIJ_5_newdatastruct;
3128bfb2e2bSShri Abhyankar         break;
3138bfb2e2bSShri Abhyankar      case 6:
314bef36659SShri Abhyankar         B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_newdatastruct;
3154c000e2eSHong Zhang         B->ops->solve = MatSolve_SeqBAIJ_6_newdatastruct;
3168bfb2e2bSShri Abhyankar         break;
3178bfb2e2bSShri Abhyankar      case 7:
318bef36659SShri Abhyankar         B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_newdatastruct;
3194c000e2eSHong Zhang         B->ops->solve = MatSolve_SeqBAIJ_7_newdatastruct;
3208bfb2e2bSShri Abhyankar         break;
3218bfb2e2bSShri Abhyankar      default:
322bef36659SShri Abhyankar         B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_newdatastruct;
3234c000e2eSHong Zhang         B->ops->solve = MatSolve_SeqBAIJ_N_newdatastruct;
3248bfb2e2bSShri Abhyankar       }
3258bfb2e2bSShri Abhyankar    }
326b588c5a2SHong Zhang   /*  ierr = MatSeqBAIJSetNumericFactorization(B,both_identity);CHKERRQ(ierr); */
327faca2338SShri Abhyankar   PetscFunctionReturn(0);
328faca2338SShri Abhyankar  }
329faca2338SShri Abhyankar 
330faca2338SShri Abhyankar #undef __FUNCT__
331*b2b2dd24SShri Abhyankar #define __FUNCT__ "MatLUFactorSymbolic_SeqBAIJ_newdatastruct_v2"
332*b2b2dd24SShri Abhyankar PetscErrorCode MatLUFactorSymbolic_SeqBAIJ_newdatastruct_v2(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
333*b2b2dd24SShri Abhyankar {
334*b2b2dd24SShri Abhyankar   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data,*b;
335*b2b2dd24SShri Abhyankar   PetscInt           n=a->mbs,bs = A->rmap->bs,bs2=a->bs2;
336*b2b2dd24SShri Abhyankar   PetscTruth         row_identity,col_identity,both_identity;
337*b2b2dd24SShri Abhyankar   IS                 isicol;
338*b2b2dd24SShri Abhyankar   PetscErrorCode     ierr;
339*b2b2dd24SShri Abhyankar   const PetscInt     *r,*ic;
340*b2b2dd24SShri Abhyankar   PetscInt           i,*ai=a->i,*aj=a->j;
341*b2b2dd24SShri Abhyankar   PetscInt           *bi,*bj,*ajtmp;
342*b2b2dd24SShri Abhyankar   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
343*b2b2dd24SShri Abhyankar   PetscReal          f;
344*b2b2dd24SShri Abhyankar   PetscInt           nlnk,*lnk,k,**bi_ptr;
345*b2b2dd24SShri Abhyankar   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
346*b2b2dd24SShri Abhyankar   PetscBT            lnkbt;
347*b2b2dd24SShri Abhyankar 
348*b2b2dd24SShri Abhyankar   PetscFunctionBegin;
349*b2b2dd24SShri Abhyankar   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
350*b2b2dd24SShri Abhyankar   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
351*b2b2dd24SShri Abhyankar   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
352*b2b2dd24SShri Abhyankar   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
353*b2b2dd24SShri Abhyankar 
354*b2b2dd24SShri Abhyankar   /* get new row pointers */
355*b2b2dd24SShri Abhyankar   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
356*b2b2dd24SShri Abhyankar   bi[0] = 0;
357*b2b2dd24SShri Abhyankar 
358*b2b2dd24SShri Abhyankar   /* bdiag is location of diagonal in factor */
359*b2b2dd24SShri Abhyankar   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
360*b2b2dd24SShri Abhyankar   bdiag[0] = 0;
361*b2b2dd24SShri Abhyankar 
362*b2b2dd24SShri Abhyankar   /* linked list for storing column indices of the active row */
363*b2b2dd24SShri Abhyankar   nlnk = n + 1;
364*b2b2dd24SShri Abhyankar   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
365*b2b2dd24SShri Abhyankar 
366*b2b2dd24SShri Abhyankar   ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr);
367*b2b2dd24SShri Abhyankar 
368*b2b2dd24SShri Abhyankar   /* initial FreeSpace size is f*(ai[n]+1) */
369*b2b2dd24SShri Abhyankar   f = info->fill;
370*b2b2dd24SShri Abhyankar   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
371*b2b2dd24SShri Abhyankar   current_space = free_space;
372*b2b2dd24SShri Abhyankar 
373*b2b2dd24SShri Abhyankar   for (i=0; i<n; i++) {
374*b2b2dd24SShri Abhyankar     /* copy previous fill into linked list */
375*b2b2dd24SShri Abhyankar     nzi = 0;
376*b2b2dd24SShri Abhyankar     nnz = ai[r[i]+1] - ai[r[i]];
377*b2b2dd24SShri Abhyankar     if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
378*b2b2dd24SShri Abhyankar     ajtmp = aj + ai[r[i]];
379*b2b2dd24SShri Abhyankar     ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
380*b2b2dd24SShri Abhyankar     nzi += nlnk;
381*b2b2dd24SShri Abhyankar 
382*b2b2dd24SShri Abhyankar     /* add pivot rows into linked list */
383*b2b2dd24SShri Abhyankar     row = lnk[n];
384*b2b2dd24SShri Abhyankar     while (row < i) {
385*b2b2dd24SShri Abhyankar       nzbd    = bdiag[row] + 1; /* num of entries in the row with column index <= row */
386*b2b2dd24SShri Abhyankar       ajtmp   = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
387*b2b2dd24SShri Abhyankar       ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
388*b2b2dd24SShri Abhyankar       nzi += nlnk;
389*b2b2dd24SShri Abhyankar       row  = lnk[row];
390*b2b2dd24SShri Abhyankar     }
391*b2b2dd24SShri Abhyankar     bi[i+1] = bi[i] + nzi;
392*b2b2dd24SShri Abhyankar     im[i]   = nzi;
393*b2b2dd24SShri Abhyankar 
394*b2b2dd24SShri Abhyankar     /* mark bdiag */
395*b2b2dd24SShri Abhyankar     nzbd = 0;
396*b2b2dd24SShri Abhyankar     nnz  = nzi;
397*b2b2dd24SShri Abhyankar     k    = lnk[n];
398*b2b2dd24SShri Abhyankar     while (nnz-- && k < i){
399*b2b2dd24SShri Abhyankar       nzbd++;
400*b2b2dd24SShri Abhyankar       k = lnk[k];
401*b2b2dd24SShri Abhyankar     }
402*b2b2dd24SShri Abhyankar     bdiag[i] = nzbd; /* note : bdaig[i] = nnzL as input for PetscFreeSpaceContiguous_LU_v2() */
403*b2b2dd24SShri Abhyankar 
404*b2b2dd24SShri Abhyankar     /* if free space is not available, make more free space */
405*b2b2dd24SShri Abhyankar     if (current_space->local_remaining<nzi) {
406*b2b2dd24SShri Abhyankar       nnz = 2*(n - i)*nzi; /* estimated and max additional space needed */
407*b2b2dd24SShri Abhyankar       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
408*b2b2dd24SShri Abhyankar       reallocs++;
409*b2b2dd24SShri Abhyankar     }
410*b2b2dd24SShri Abhyankar 
411*b2b2dd24SShri Abhyankar     /* copy data into free space, then initialize lnk */
412*b2b2dd24SShri Abhyankar     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
413*b2b2dd24SShri Abhyankar     bi_ptr[i] = current_space->array;
414*b2b2dd24SShri Abhyankar     current_space->array           += nzi;
415*b2b2dd24SShri Abhyankar     current_space->local_used      += nzi;
416*b2b2dd24SShri Abhyankar     current_space->local_remaining -= nzi;
417*b2b2dd24SShri Abhyankar   }
418*b2b2dd24SShri Abhyankar #if defined(PETSC_USE_INFO)
419*b2b2dd24SShri Abhyankar   if (ai[n] != 0) {
420*b2b2dd24SShri Abhyankar     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
421*b2b2dd24SShri Abhyankar     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
422*b2b2dd24SShri Abhyankar     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
423*b2b2dd24SShri Abhyankar     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
424*b2b2dd24SShri Abhyankar     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
425*b2b2dd24SShri Abhyankar   } else {
426*b2b2dd24SShri Abhyankar     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
427*b2b2dd24SShri Abhyankar   }
428*b2b2dd24SShri Abhyankar #endif
429*b2b2dd24SShri Abhyankar 
430*b2b2dd24SShri Abhyankar   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
431*b2b2dd24SShri Abhyankar   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
432*b2b2dd24SShri Abhyankar 
433*b2b2dd24SShri Abhyankar   /* destroy list of free space and other temporary array(s) */
434*b2b2dd24SShri Abhyankar   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
435*b2b2dd24SShri Abhyankar   ierr = PetscFreeSpaceContiguous_LU_v2(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr);
436*b2b2dd24SShri Abhyankar   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
437*b2b2dd24SShri Abhyankar   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
438*b2b2dd24SShri Abhyankar 
439*b2b2dd24SShri Abhyankar   /* put together the new matrix */
440*b2b2dd24SShri Abhyankar   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
441*b2b2dd24SShri Abhyankar   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
442*b2b2dd24SShri Abhyankar   b    = (Mat_SeqBAIJ*)(B)->data;
443*b2b2dd24SShri Abhyankar   b->free_a       = PETSC_TRUE;
444*b2b2dd24SShri Abhyankar   b->free_ij      = PETSC_TRUE;
445*b2b2dd24SShri Abhyankar   b->singlemalloc = PETSC_FALSE;
446*b2b2dd24SShri Abhyankar   ierr          = PetscMalloc((bdiag[0]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr);
447*b2b2dd24SShri Abhyankar   b->j          = bj;
448*b2b2dd24SShri Abhyankar   b->i          = bi;
449*b2b2dd24SShri Abhyankar   b->diag       = bdiag;
450*b2b2dd24SShri Abhyankar   b->free_diag  = PETSC_TRUE;
451*b2b2dd24SShri Abhyankar   b->ilen       = 0;
452*b2b2dd24SShri Abhyankar   b->imax       = 0;
453*b2b2dd24SShri Abhyankar   b->row        = isrow;
454*b2b2dd24SShri Abhyankar   b->col        = iscol;
455*b2b2dd24SShri Abhyankar   b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;
456*b2b2dd24SShri Abhyankar   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
457*b2b2dd24SShri Abhyankar   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
458*b2b2dd24SShri Abhyankar   b->icol       = isicol;
459*b2b2dd24SShri Abhyankar   ierr = PetscMalloc((bs*n+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
460*b2b2dd24SShri Abhyankar   ierr = PetscLogObjectMemory(B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2));CHKERRQ(ierr);
461*b2b2dd24SShri Abhyankar 
462*b2b2dd24SShri Abhyankar   b->maxnz = b->nz = bdiag[0]+1;
463*b2b2dd24SShri Abhyankar   B->factor                =  MAT_FACTOR_LU;
464*b2b2dd24SShri Abhyankar   B->info.factor_mallocs   = reallocs;
465*b2b2dd24SShri Abhyankar   B->info.fill_ratio_given = f;
466*b2b2dd24SShri Abhyankar 
467*b2b2dd24SShri Abhyankar   if (ai[n] != 0) {
468*b2b2dd24SShri Abhyankar     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
469*b2b2dd24SShri Abhyankar   } else {
470*b2b2dd24SShri Abhyankar     B->info.fill_ratio_needed = 0.0;
471*b2b2dd24SShri Abhyankar   }
472*b2b2dd24SShri Abhyankar 
473*b2b2dd24SShri Abhyankar   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
474*b2b2dd24SShri Abhyankar   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
475*b2b2dd24SShri Abhyankar   both_identity = (PetscTruth) (row_identity && col_identity);
476*b2b2dd24SShri Abhyankar   if(both_identity){
477*b2b2dd24SShri Abhyankar      switch(bs){
478*b2b2dd24SShri Abhyankar      case 2:
479*b2b2dd24SShri Abhyankar         B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_newdatastruct_v2;
480*b2b2dd24SShri Abhyankar         B->ops->solve           = MatSolve_SeqBAIJ_2_NaturalOrdering_newdatastruct_v2;
481*b2b2dd24SShri Abhyankar         break;
482*b2b2dd24SShri Abhyankar      case 3:
483*b2b2dd24SShri Abhyankar 	B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering_newdatastruct_v2;
484*b2b2dd24SShri Abhyankar         B->ops->solve = MatSolve_SeqBAIJ_3_NaturalOrdering_newdatastruct_v2;
485*b2b2dd24SShri Abhyankar         break;
486*b2b2dd24SShri Abhyankar      case 4:
487*b2b2dd24SShri Abhyankar         B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_newdatastruct_v2;
488*b2b2dd24SShri Abhyankar         B->ops->solve = MatSolve_SeqBAIJ_4_NaturalOrdering_newdatastruct_v2;
489*b2b2dd24SShri Abhyankar         break;
490*b2b2dd24SShri Abhyankar      case 5:
491*b2b2dd24SShri Abhyankar         B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering_newdatastruct;
492*b2b2dd24SShri Abhyankar         B->ops->solve = MatSolve_SeqBAIJ_5_NaturalOrdering_newdatastruct;
493*b2b2dd24SShri Abhyankar         break;
494*b2b2dd24SShri Abhyankar      case 6:
495*b2b2dd24SShri Abhyankar         B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering_newdatastruct;
496*b2b2dd24SShri Abhyankar         B->ops->solve = MatSolve_SeqBAIJ_6_NaturalOrdering_newdatastruct;
497*b2b2dd24SShri Abhyankar         break;
498*b2b2dd24SShri Abhyankar      case 7:
499*b2b2dd24SShri Abhyankar         B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering_newdatastruct;
500*b2b2dd24SShri Abhyankar         B->ops->solve = MatSolve_SeqBAIJ_7_NaturalOrdering_newdatastruct;
501*b2b2dd24SShri Abhyankar         break;
502*b2b2dd24SShri Abhyankar      default:
503*b2b2dd24SShri Abhyankar         B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_newdatastruct;
504*b2b2dd24SShri Abhyankar         B->ops->solve = MatSolve_SeqBAIJ_N_NaturalOrdering_newdatastruct;
505*b2b2dd24SShri Abhyankar       }
506*b2b2dd24SShri Abhyankar    }
507*b2b2dd24SShri Abhyankar    else {
508*b2b2dd24SShri Abhyankar      switch(bs){
509*b2b2dd24SShri Abhyankar      case 2:
510*b2b2dd24SShri Abhyankar         B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_newdatastruct;
511*b2b2dd24SShri Abhyankar         B->ops->solve           = MatSolve_SeqBAIJ_2_newdatastruct;
512*b2b2dd24SShri Abhyankar         break;
513*b2b2dd24SShri Abhyankar      case 3:
514*b2b2dd24SShri Abhyankar 	B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_newdatastruct;
515*b2b2dd24SShri Abhyankar         B->ops->solve = MatSolve_SeqBAIJ_3_newdatastruct;
516*b2b2dd24SShri Abhyankar         break;
517*b2b2dd24SShri Abhyankar      case 4:
518*b2b2dd24SShri Abhyankar         B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_newdatastruct;
519*b2b2dd24SShri Abhyankar         B->ops->solve = MatSolve_SeqBAIJ_4_newdatastruct;
520*b2b2dd24SShri Abhyankar         break;
521*b2b2dd24SShri Abhyankar      case 5:
522*b2b2dd24SShri Abhyankar         B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_newdatastruct;
523*b2b2dd24SShri Abhyankar         B->ops->solve = MatSolve_SeqBAIJ_5_newdatastruct;
524*b2b2dd24SShri Abhyankar         break;
525*b2b2dd24SShri Abhyankar      case 6:
526*b2b2dd24SShri Abhyankar         B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_newdatastruct;
527*b2b2dd24SShri Abhyankar         B->ops->solve = MatSolve_SeqBAIJ_6_newdatastruct;
528*b2b2dd24SShri Abhyankar         break;
529*b2b2dd24SShri Abhyankar      case 7:
530*b2b2dd24SShri Abhyankar         B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_newdatastruct;
531*b2b2dd24SShri Abhyankar         B->ops->solve = MatSolve_SeqBAIJ_7_newdatastruct;
532*b2b2dd24SShri Abhyankar         break;
533*b2b2dd24SShri Abhyankar      default:
534*b2b2dd24SShri Abhyankar         B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_newdatastruct;
535*b2b2dd24SShri Abhyankar         B->ops->solve = MatSolve_SeqBAIJ_N_newdatastruct;
536*b2b2dd24SShri Abhyankar       }
537*b2b2dd24SShri Abhyankar    }
538*b2b2dd24SShri Abhyankar   /*  ierr = MatSeqBAIJSetNumericFactorization(B,both_identity);CHKERRQ(ierr); */
539*b2b2dd24SShri Abhyankar   PetscFunctionReturn(0);
540*b2b2dd24SShri Abhyankar  }
541*b2b2dd24SShri Abhyankar 
542*b2b2dd24SShri Abhyankar #undef __FUNCT__
543faca2338SShri Abhyankar #define __FUNCT__ "MatLUFactorSymbolic_SeqBAIJ"
544faca2338SShri Abhyankar PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
545faca2338SShri Abhyankar {
546faca2338SShri Abhyankar   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data,*b;
547faca2338SShri Abhyankar   PetscInt           n=a->mbs,bs = A->rmap->bs,bs2=a->bs2;
548faca2338SShri Abhyankar   PetscTruth         row_identity,col_identity,both_identity;
549faca2338SShri Abhyankar   IS                 isicol;
550faca2338SShri Abhyankar   PetscErrorCode     ierr;
551faca2338SShri Abhyankar   const PetscInt     *r,*ic;
552faca2338SShri Abhyankar   PetscInt           i,*ai=a->i,*aj=a->j;
553faca2338SShri Abhyankar   PetscInt           *bi,*bj,*ajtmp;
554faca2338SShri Abhyankar   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
555faca2338SShri Abhyankar   PetscReal          f;
556faca2338SShri Abhyankar   PetscInt           nlnk,*lnk,k,**bi_ptr;
557faca2338SShri Abhyankar   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
558faca2338SShri Abhyankar   PetscBT            lnkbt;
559faca2338SShri Abhyankar   PetscTruth         newdatastruct=PETSC_FALSE;
560*b2b2dd24SShri Abhyankar   PetscTruth         newdatastruct_v2=PETSC_FALSE;
561faca2338SShri Abhyankar 
562faca2338SShri Abhyankar   PetscFunctionBegin;
563faca2338SShri Abhyankar   ierr = PetscOptionsGetTruth(PETSC_NULL,"-lu_new",&newdatastruct,PETSC_NULL);CHKERRQ(ierr);
564faca2338SShri Abhyankar   if(newdatastruct){
565faca2338SShri Abhyankar     ierr = MatLUFactorSymbolic_SeqBAIJ_newdatastruct(B,A,isrow,iscol,info);CHKERRQ(ierr);
566faca2338SShri Abhyankar     PetscFunctionReturn(0);
567faca2338SShri Abhyankar   }
568faca2338SShri Abhyankar 
569*b2b2dd24SShri Abhyankar   ierr = PetscOptionsGetTruth(PETSC_NULL,"-lu_new_v2",&newdatastruct_v2,PETSC_NULL);CHKERRQ(ierr);
570*b2b2dd24SShri Abhyankar   if(newdatastruct_v2){
571*b2b2dd24SShri Abhyankar     ierr = MatLUFactorSymbolic_SeqBAIJ_newdatastruct_v2(B,A,isrow,iscol,info);CHKERRQ(ierr);
572*b2b2dd24SShri Abhyankar     PetscFunctionReturn(0);
573*b2b2dd24SShri Abhyankar   }
574*b2b2dd24SShri Abhyankar 
575faca2338SShri Abhyankar   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
576faca2338SShri Abhyankar   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
577faca2338SShri Abhyankar   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
578faca2338SShri Abhyankar   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
579faca2338SShri Abhyankar 
580faca2338SShri Abhyankar   /* get new row pointers */
581c7272abeSHong Zhang   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
582c7272abeSHong Zhang   bi[0] = 0;
583c7272abeSHong Zhang 
584c7272abeSHong Zhang   /* bdiag is location of diagonal in factor */
585c7272abeSHong Zhang   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
586c7272abeSHong Zhang   bdiag[0] = 0;
587c7272abeSHong Zhang 
588c7272abeSHong Zhang   /* linked list for storing column indices of the active row */
589c7272abeSHong Zhang   nlnk = n + 1;
590c7272abeSHong Zhang   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
591c7272abeSHong Zhang 
592c7272abeSHong Zhang   ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr);
593c7272abeSHong Zhang 
594c7272abeSHong Zhang   /* initial FreeSpace size is f*(ai[n]+1) */
595c7272abeSHong Zhang   f = info->fill;
596c7272abeSHong Zhang   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
597c7272abeSHong Zhang   current_space = free_space;
59883287d42SBarry Smith 
59983287d42SBarry Smith   for (i=0; i<n; i++) {
600c7272abeSHong Zhang     /* copy previous fill into linked list */
60183287d42SBarry Smith     nzi = 0;
602c7272abeSHong Zhang     nnz = ai[r[i]+1] - ai[r[i]];
603c7272abeSHong Zhang     if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
604c7272abeSHong Zhang     ajtmp = aj + ai[r[i]];
605c7272abeSHong Zhang     ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
606c7272abeSHong Zhang     nzi += nlnk;
607c7272abeSHong Zhang 
608c7272abeSHong Zhang     /* add pivot rows into linked list */
609c7272abeSHong Zhang     row = lnk[n];
610c7272abeSHong Zhang     while (row < i) {
611c7272abeSHong Zhang       nzbd    = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
612c7272abeSHong Zhang       ajtmp   = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
613c7272abeSHong Zhang       ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
614c7272abeSHong Zhang       nzi += nlnk;
615c7272abeSHong Zhang       row  = lnk[row];
61683287d42SBarry Smith     }
617c7272abeSHong Zhang     bi[i+1] = bi[i] + nzi;
618c7272abeSHong Zhang     im[i]   = nzi;
619c7272abeSHong Zhang 
620c7272abeSHong Zhang     /* mark bdiag */
621c7272abeSHong Zhang     nzbd = 0;
622c7272abeSHong Zhang     nnz  = nzi;
623c7272abeSHong Zhang     k    = lnk[n];
624c7272abeSHong Zhang     while (nnz-- && k < i){
625c7272abeSHong Zhang       nzbd++;
626c7272abeSHong Zhang       k = lnk[k];
627c7272abeSHong Zhang     }
628c7272abeSHong Zhang     bdiag[i] = bi[i] + nzbd;
629c7272abeSHong Zhang 
630c7272abeSHong Zhang     /* if free space is not available, make more free space */
631c7272abeSHong Zhang     if (current_space->local_remaining<nzi) {
632c7272abeSHong Zhang       nnz = (n - i)*nzi; /* estimated and max additional space needed */
633c7272abeSHong Zhang       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
634c7272abeSHong Zhang       reallocs++;
63583287d42SBarry Smith     }
63683287d42SBarry Smith 
637c7272abeSHong Zhang     /* copy data into free space, then initialize lnk */
638c7272abeSHong Zhang     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
639c7272abeSHong Zhang     bi_ptr[i] = current_space->array;
640c7272abeSHong Zhang     current_space->array           += nzi;
641c7272abeSHong Zhang     current_space->local_used      += nzi;
642c7272abeSHong Zhang     current_space->local_remaining -= nzi;
643c7272abeSHong Zhang   }
6446cf91177SBarry Smith #if defined(PETSC_USE_INFO)
64583287d42SBarry Smith   if (ai[n] != 0) {
646c7272abeSHong Zhang     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
647ae15b995SBarry Smith     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
648ae15b995SBarry Smith     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
649ae15b995SBarry Smith     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
650ae15b995SBarry Smith     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
65183287d42SBarry Smith   } else {
652c7272abeSHong Zhang     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
65383287d42SBarry Smith   }
65463ba0a88SBarry Smith #endif
65583287d42SBarry Smith 
65683287d42SBarry Smith   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
65783287d42SBarry Smith   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
65883287d42SBarry Smith 
659c7272abeSHong Zhang   /* destroy list of free space and other temporary array(s) */
660c7272abeSHong Zhang   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
661c7272abeSHong Zhang   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
662c7272abeSHong Zhang   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
663c7272abeSHong Zhang   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
66483287d42SBarry Smith 
66583287d42SBarry Smith   /* put together the new matrix */
666719d5645SBarry Smith   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
667719d5645SBarry Smith   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
668719d5645SBarry Smith   b    = (Mat_SeqBAIJ*)(B)->data;
669e6b907acSBarry Smith   b->free_a       = PETSC_TRUE;
670e6b907acSBarry Smith   b->free_ij      = PETSC_TRUE;
671c7272abeSHong Zhang   b->singlemalloc = PETSC_FALSE;
672c7272abeSHong Zhang   ierr          = PetscMalloc((bi[n]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr);
673c7272abeSHong Zhang   b->j          = bj;
674c7272abeSHong Zhang   b->i          = bi;
675c7272abeSHong Zhang   b->diag       = bdiag;
6767f53bb6cSHong Zhang   b->free_diag  = PETSC_TRUE;
67783287d42SBarry Smith   b->ilen       = 0;
67883287d42SBarry Smith   b->imax       = 0;
67983287d42SBarry Smith   b->row        = isrow;
68083287d42SBarry Smith   b->col        = iscol;
681d3d32019SBarry Smith   b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;
68283287d42SBarry Smith   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
68383287d42SBarry Smith   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
68483287d42SBarry Smith   b->icol       = isicol;
68587828ca2SBarry Smith   ierr = PetscMalloc((bs*n+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
686c7272abeSHong Zhang   ierr = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2));CHKERRQ(ierr);
68783287d42SBarry Smith 
688c7272abeSHong Zhang   b->maxnz = b->nz = bi[n] ;
689c7272abeSHong Zhang   (B)->factor                =  MAT_FACTOR_LU;
690719d5645SBarry Smith   (B)->info.factor_mallocs   = reallocs;
691719d5645SBarry Smith   (B)->info.fill_ratio_given = f;
692c7272abeSHong Zhang 
69383287d42SBarry Smith   if (ai[n] != 0) {
694c7272abeSHong Zhang     (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
69583287d42SBarry Smith   } else {
696719d5645SBarry Smith     (B)->info.fill_ratio_needed = 0.0;
69783287d42SBarry Smith   }
698c7272abeSHong Zhang 
699db4efbfdSBarry Smith   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
700db4efbfdSBarry Smith   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
7017d18ce8fSMatthew Knepley   both_identity = (PetscTruth) (row_identity && col_identity);
70241df41f0SMatthew Knepley   ierr = MatSeqBAIJSetNumericFactorization(B,both_identity);CHKERRQ(ierr);
70383287d42SBarry Smith   PetscFunctionReturn(0);
70483287d42SBarry Smith  }
705db4efbfdSBarry Smith 
706