xref: /petsc/src/mat/impls/baij/seq/baijfact3.c (revision eeffb40d691afbdd57a8091619e7ddd44ac5fdca)
1 #define PETSCMAT_DLL
2 
3 /*
4     Factorization code for BAIJ format.
5 */
6 #include "../src/mat/impls/baij/seq/baij.h"
7 #include "../src/mat/blockinvert.h"
8 
9 #undef __FUNCT__
10 #define __FUNCT__ "MatSeqBAIJSetNumericFactorization"
11 /*
12    This is used to set the numeric factorization for both LU and ILU symbolic factorization
13 */
14 PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat inA,PetscTruth natural)
15 {
16   PetscFunctionBegin;
17   if (natural) {
18     switch (inA->rmap->bs) {
19     case 1:
20       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
21       break;
22     case 2:
23       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering;
24       break;
25     case 3:
26       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering;
27       break;
28     case 4:
29 #if defined(PETSC_USE_MAT_SINGLE)
30       {
31         PetscTruth  sse_enabled_local;
32         PetscErrorCode ierr;
33         ierr = PetscSSEIsEnabled(inA->comm,&sse_enabled_local,PETSC_NULL);CHKERRQ(ierr);
34         if (sse_enabled_local) {
35 #  if defined(PETSC_HAVE_SSE)
36           int i,*AJ=a->j,nz=a->nz,n=a->mbs;
37           if (n==(unsigned short)n) {
38             unsigned short *aj=(unsigned short *)AJ;
39             for (i=0;i<nz;i++) {
40               aj[i] = (unsigned short)AJ[i];
41             }
42             inA->ops->setunfactored   = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE_usj;
43             inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE_usj;
44             ierr = PetscInfo(inA,"Using special SSE, in-place natural ordering, ushort j index factor BS=4\n");CHKERRQ(ierr);
45           } else {
46         /* Scale the column indices for easier indexing in MatSolve. */
47 /*            for (i=0;i<nz;i++) { */
48 /*              AJ[i] = AJ[i]*4; */
49 /*            } */
50             inA->ops->setunfactored   = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE;
51             inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE;
52             ierr = PetscInfo(inA,"Using special SSE, in-place natural ordering, int j index factor BS=4\n");CHKERRQ(ierr);
53           }
54 #  else
55         /* This should never be reached.  If so, problem in PetscSSEIsEnabled. */
56           SETERRQ(PETSC_ERR_SUP,"SSE Hardware unavailable");
57 #  endif
58         } else {
59           inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
60         }
61       }
62 #else
63       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
64 #endif
65       break;
66     case 5:
67       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
68       break;
69     case 6:
70       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering;
71       break;
72     case 7:
73       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering;
74       break;
75     default:
76       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
77       break;
78     }
79   } else {
80     switch (inA->rmap->bs) {
81     case 1:
82       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
83       break;
84     case 2:
85       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
86       break;
87     case 3:
88       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
89       break;
90     case 4:
91       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
92       break;
93     case 5:
94       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
95       break;
96     case 6:
97       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
98       break;
99     case 7:
100       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
101       break;
102     default:
103       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
104       break;
105     }
106   }
107   PetscFunctionReturn(0);
108 }
109 
110 /*
111     The symbolic factorization code is identical to that for AIJ format,
112   except for very small changes since this is now a SeqBAIJ datastructure.
113   NOT good code reuse.
114 */
115 #include "petscbt.h"
116 #include "../src/mat/utils/freespace.h"
117 
118 #undef __FUNCT__
119 #define __FUNCT__ "MatLUFactorSymbolic_SeqBAIJ_newdatastruct"
120 PetscErrorCode MatLUFactorSymbolic_SeqBAIJ_newdatastruct(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
121 {
122   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data,*b;
123   PetscInt           n=a->mbs,bs = A->rmap->bs,bs2=a->bs2;
124   PetscTruth         row_identity,col_identity,both_identity;
125   IS                 isicol;
126   PetscErrorCode     ierr;
127   const PetscInt     *r,*ic;
128   PetscInt           i,*ai=a->i,*aj=a->j;
129   PetscInt           *bi,*bj,*ajtmp;
130   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
131   PetscReal          f;
132   PetscInt           nlnk,*lnk,k,**bi_ptr;
133   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
134   PetscBT            lnkbt;
135 
136   PetscFunctionBegin;
137   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
138   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
139   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
140   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
141 
142   /* get new row pointers */
143   ierr = PetscMalloc((2*n+2)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
144   bi[0] = 0;
145 
146   /* bdiag is location of diagonal in factor */
147   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
148   bdiag[0] = 0;
149 
150   /* linked list for storing column indices of the active row */
151   nlnk = n + 1;
152   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
153 
154   ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr);
155 
156   /* initial FreeSpace size is f*(ai[n]+1) */
157   f = info->fill;
158   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
159   current_space = free_space;
160 
161   for (i=0; i<n; i++) {
162     /* copy previous fill into linked list */
163     nzi = 0;
164     nnz = ai[r[i]+1] - ai[r[i]];
165     if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
166     ajtmp = aj + ai[r[i]];
167     ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
168     nzi += nlnk;
169 
170     /* add pivot rows into linked list */
171     row = lnk[n];
172     while (row < i) {
173       nzbd    = bdiag[row] + 1; /* num of entries in the row with column index <= row */
174       ajtmp   = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
175       ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
176       nzi += nlnk;
177       row  = lnk[row];
178     }
179     bi[i+1] = bi[i] + nzi;
180     im[i]   = nzi;
181 
182     /* mark bdiag */
183     nzbd = 0;
184     nnz  = nzi;
185     k    = lnk[n];
186     while (nnz-- && k < i){
187       nzbd++;
188       k = lnk[k];
189     }
190     bdiag[i] = nzbd; /* note : bdaig[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
191 
192     /* if free space is not available, make more free space */
193     if (current_space->local_remaining<nzi) {
194       nnz = 2*(n - i)*nzi; /* estimated and max additional space needed */
195       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
196       reallocs++;
197     }
198 
199     /* copy data into free space, then initialize lnk */
200     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
201     bi_ptr[i] = current_space->array;
202     current_space->array           += nzi;
203     current_space->local_used      += nzi;
204     current_space->local_remaining -= nzi;
205   }
206 #if defined(PETSC_USE_INFO)
207   if (ai[n] != 0) {
208     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
209     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
210     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
211     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
212     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
213   } else {
214     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
215   }
216 #endif
217 
218   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
219   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
220 
221   /* destroy list of free space and other temporary array(s) */
222   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
223   ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr);
224   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
225   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
226 
227   /* put together the new matrix */
228   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
229   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
230   b    = (Mat_SeqBAIJ*)(B)->data;
231   b->free_a       = PETSC_TRUE;
232   b->free_ij      = PETSC_TRUE;
233   b->singlemalloc = PETSC_FALSE;
234   ierr          = PetscMalloc((bi[2*n+1])*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr);
235   b->j          = bj;
236   b->i          = bi;
237   b->diag       = bdiag;
238   b->free_diag  = PETSC_TRUE;
239   b->ilen       = 0;
240   b->imax       = 0;
241   b->row        = isrow;
242   b->col        = iscol;
243   b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;
244   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
245   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
246   b->icol       = isicol;
247   ierr = PetscMalloc((bs*n+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
248   ierr = PetscLogObjectMemory(B,(bi[2*n+1])*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2));CHKERRQ(ierr);
249 
250   b->maxnz = b->nz = bi[2*n+1];
251   B->factor                =  MAT_FACTOR_LU;
252   B->info.factor_mallocs   = reallocs;
253   B->info.fill_ratio_given = f;
254 
255   if (ai[n] != 0) {
256     B->info.fill_ratio_needed = ((PetscReal)bi[2*n+1])/((PetscReal)ai[n]);
257   } else {
258     B->info.fill_ratio_needed = 0.0;
259   }
260 
261   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
262   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
263   both_identity = (PetscTruth) (row_identity && col_identity);
264   if(both_identity){
265      switch(bs){
266      case 2:
267         B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_newdatastruct;
268         B->ops->solve           = MatSolve_SeqBAIJ_2_NaturalOrdering_newdatastruct;
269         break;
270      case 3:
271 	B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering_newdatastruct;
272         B->ops->solve = MatSolve_SeqBAIJ_3_NaturalOrdering_newdatastruct;
273         break;
274      case 4:
275         B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_newdatastruct;
276         B->ops->solve = MatSolve_SeqBAIJ_4_NaturalOrdering_newdatastruct;
277         break;
278      case 5:
279         B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering_newdatastruct;
280         B->ops->solve = MatSolve_SeqBAIJ_5_NaturalOrdering_newdatastruct;
281         break;
282      case 6:
283         B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering_newdatastruct;
284         B->ops->solve = MatSolve_SeqBAIJ_6_NaturalOrdering_newdatastruct;
285         break;
286      case 7:
287         B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering_newdatastruct;
288         B->ops->solve = MatSolve_SeqBAIJ_7_NaturalOrdering_newdatastruct;
289         break;
290      default:
291         B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_newdatastruct;
292         B->ops->solve = MatSolve_SeqBAIJ_N_NaturalOrdering_newdatastruct;
293       }
294    }
295    else {
296      switch(bs){
297      case 2:
298         B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_newdatastruct;
299         B->ops->solve           = MatSolve_SeqBAIJ_2_newdatastruct;
300         break;
301      case 3:
302 	B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_newdatastruct;
303         B->ops->solve = MatSolve_SeqBAIJ_3_newdatastruct;
304         break;
305      case 4:
306         B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_newdatastruct;
307         B->ops->solve = MatSolve_SeqBAIJ_4_newdatastruct;
308         break;
309      case 5:
310         B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_newdatastruct;
311         B->ops->solve = MatSolve_SeqBAIJ_5_newdatastruct;
312         break;
313      case 6:
314         B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_newdatastruct;
315         B->ops->solve = MatSolve_SeqBAIJ_6_newdatastruct;
316         break;
317      case 7:
318         B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_newdatastruct;
319         B->ops->solve = MatSolve_SeqBAIJ_7_newdatastruct;
320         break;
321      default:
322         B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_newdatastruct;
323         B->ops->solve = MatSolve_SeqBAIJ_N_newdatastruct;
324       }
325    }
326   /*  ierr = MatSeqBAIJSetNumericFactorization(B,both_identity);CHKERRQ(ierr); */
327   PetscFunctionReturn(0);
328  }
329 
330 #undef __FUNCT__
331 #define __FUNCT__ "MatLUFactorSymbolic_SeqBAIJ_newdatastruct_v2"
332 PetscErrorCode MatLUFactorSymbolic_SeqBAIJ_newdatastruct_v2(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
333 {
334   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data,*b;
335   PetscInt           n=a->mbs,bs = A->rmap->bs,bs2=a->bs2;
336   PetscTruth         row_identity,col_identity,both_identity;
337   IS                 isicol;
338   PetscErrorCode     ierr;
339   const PetscInt     *r,*ic;
340   PetscInt           i,*ai=a->i,*aj=a->j;
341   PetscInt           *bi,*bj,*ajtmp;
342   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
343   PetscReal          f;
344   PetscInt           nlnk,*lnk,k,**bi_ptr;
345   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
346   PetscBT            lnkbt;
347 
348   PetscFunctionBegin;
349   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
350   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
351   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
352   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
353 
354   /* get new row pointers */
355   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
356   bi[0] = 0;
357 
358   /* bdiag is location of diagonal in factor */
359   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
360   bdiag[0] = 0;
361 
362   /* linked list for storing column indices of the active row */
363   nlnk = n + 1;
364   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
365 
366   ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr);
367 
368   /* initial FreeSpace size is f*(ai[n]+1) */
369   f = info->fill;
370   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
371   current_space = free_space;
372 
373   for (i=0; i<n; i++) {
374     /* copy previous fill into linked list */
375     nzi = 0;
376     nnz = ai[r[i]+1] - ai[r[i]];
377     if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
378     ajtmp = aj + ai[r[i]];
379     ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
380     nzi += nlnk;
381 
382     /* add pivot rows into linked list */
383     row = lnk[n];
384     while (row < i) {
385       nzbd    = bdiag[row] + 1; /* num of entries in the row with column index <= row */
386       ajtmp   = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
387       ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
388       nzi += nlnk;
389       row  = lnk[row];
390     }
391     bi[i+1] = bi[i] + nzi;
392     im[i]   = nzi;
393 
394     /* mark bdiag */
395     nzbd = 0;
396     nnz  = nzi;
397     k    = lnk[n];
398     while (nnz-- && k < i){
399       nzbd++;
400       k = lnk[k];
401     }
402     bdiag[i] = nzbd; /* note : bdaig[i] = nnzL as input for PetscFreeSpaceContiguous_LU_v2() */
403 
404     /* if free space is not available, make more free space */
405     if (current_space->local_remaining<nzi) {
406       nnz = 2*(n - i)*nzi; /* estimated and max additional space needed */
407       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
408       reallocs++;
409     }
410 
411     /* copy data into free space, then initialize lnk */
412     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
413     bi_ptr[i] = current_space->array;
414     current_space->array           += nzi;
415     current_space->local_used      += nzi;
416     current_space->local_remaining -= nzi;
417   }
418 #if defined(PETSC_USE_INFO)
419   if (ai[n] != 0) {
420     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
421     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
422     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
423     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
424     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
425   } else {
426     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
427   }
428 #endif
429 
430   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
431   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
432 
433   /* destroy list of free space and other temporary array(s) */
434   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
435   ierr = PetscFreeSpaceContiguous_LU_v2(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr);
436   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
437   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
438 
439   /* put together the new matrix */
440   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
441   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
442   b    = (Mat_SeqBAIJ*)(B)->data;
443   b->free_a       = PETSC_TRUE;
444   b->free_ij      = PETSC_TRUE;
445   b->singlemalloc = PETSC_FALSE;
446   ierr          = PetscMalloc((bdiag[0]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr);
447   b->j          = bj;
448   b->i          = bi;
449   b->diag       = bdiag;
450   b->free_diag  = PETSC_TRUE;
451   b->ilen       = 0;
452   b->imax       = 0;
453   b->row        = isrow;
454   b->col        = iscol;
455   b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;
456   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
457   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
458   b->icol       = isicol;
459   ierr = PetscMalloc((bs*n+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
460   ierr = PetscLogObjectMemory(B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2));CHKERRQ(ierr);
461 
462   b->maxnz = b->nz = bdiag[0]+1;
463   B->factor                =  MAT_FACTOR_LU;
464   B->info.factor_mallocs   = reallocs;
465   B->info.fill_ratio_given = f;
466 
467   if (ai[n] != 0) {
468     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
469   } else {
470     B->info.fill_ratio_needed = 0.0;
471   }
472 
473   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
474   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
475   both_identity = (PetscTruth) (row_identity && col_identity);
476   if(both_identity){
477      switch(bs){
478      case 2:
479         B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_newdatastruct_v2;
480         B->ops->solve           = MatSolve_SeqBAIJ_2_NaturalOrdering_newdatastruct_v2;
481         break;
482      case 3:
483 	B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering_newdatastruct_v2;
484         B->ops->solve = MatSolve_SeqBAIJ_3_NaturalOrdering_newdatastruct_v2;
485         break;
486      case 4:
487         B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_newdatastruct_v2;
488         B->ops->solve = MatSolve_SeqBAIJ_4_NaturalOrdering_newdatastruct_v2;
489         break;
490      case 5:
491         B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering_newdatastruct_v2;
492         B->ops->solve = MatSolve_SeqBAIJ_5_NaturalOrdering_newdatastruct_v2;
493         break;
494      case 6:
495         B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering_newdatastruct_v2;
496         B->ops->solve = MatSolve_SeqBAIJ_6_NaturalOrdering_newdatastruct_v2;
497         break;
498      case 7:
499         B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering_newdatastruct_v2;
500         B->ops->solve = MatSolve_SeqBAIJ_7_NaturalOrdering_newdatastruct_v2;
501         break;
502      default:
503         B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_newdatastruct_v2;
504         B->ops->solve = MatSolve_SeqBAIJ_N_NaturalOrdering_newdatastruct_v2;
505       }
506    }
507    else {
508      switch(bs){
509      case 2:
510         B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_newdatastruct_v2;
511         B->ops->solve           = MatSolve_SeqBAIJ_2_newdatastruct_v2;
512         break;
513      case 3:
514 	B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_newdatastruct_v2;
515         B->ops->solve = MatSolve_SeqBAIJ_3_newdatastruct_v2;
516         break;
517      case 4:
518         B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_newdatastruct_v2;
519         B->ops->solve = MatSolve_SeqBAIJ_4_newdatastruct_v2;
520         break;
521      case 5:
522         B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_newdatastruct_v2;
523         B->ops->solve = MatSolve_SeqBAIJ_5_newdatastruct_v2;
524         break;
525      case 6:
526         B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_newdatastruct_v2;
527         B->ops->solve = MatSolve_SeqBAIJ_6_newdatastruct_v2;
528         break;
529      case 7:
530         B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_newdatastruct;
531         B->ops->solve = MatSolve_SeqBAIJ_7_newdatastruct;
532         break;
533      default:
534         B->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_newdatastruct;
535         B->ops->solve = MatSolve_SeqBAIJ_N_newdatastruct;
536       }
537    }
538   /*  ierr = MatSeqBAIJSetNumericFactorization(B,both_identity);CHKERRQ(ierr); */
539   PetscFunctionReturn(0);
540  }
541 
542 #undef __FUNCT__
543 #define __FUNCT__ "MatLUFactorSymbolic_SeqBAIJ"
544 PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
545 {
546   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data,*b;
547   PetscInt           n=a->mbs,bs = A->rmap->bs,bs2=a->bs2;
548   PetscTruth         row_identity,col_identity,both_identity;
549   IS                 isicol;
550   PetscErrorCode     ierr;
551   const PetscInt     *r,*ic;
552   PetscInt           i,*ai=a->i,*aj=a->j;
553   PetscInt           *bi,*bj,*ajtmp;
554   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
555   PetscReal          f;
556   PetscInt           nlnk,*lnk,k,**bi_ptr;
557   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
558   PetscBT            lnkbt;
559   PetscTruth         newdatastruct=PETSC_FALSE;
560   PetscTruth         newdatastruct_v2=PETSC_FALSE;
561 
562   PetscFunctionBegin;
563   ierr = PetscOptionsGetTruth(PETSC_NULL,"-lu_new",&newdatastruct,PETSC_NULL);CHKERRQ(ierr);
564   if(newdatastruct){
565     ierr = MatLUFactorSymbolic_SeqBAIJ_newdatastruct(B,A,isrow,iscol,info);CHKERRQ(ierr);
566     PetscFunctionReturn(0);
567   }
568 
569   ierr = PetscOptionsGetTruth(PETSC_NULL,"-lu_new_v2",&newdatastruct_v2,PETSC_NULL);CHKERRQ(ierr);
570   if(newdatastruct_v2){
571     ierr = MatLUFactorSymbolic_SeqBAIJ_newdatastruct_v2(B,A,isrow,iscol,info);CHKERRQ(ierr);
572     PetscFunctionReturn(0);
573   }
574 
575   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
576   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
577   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
578   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
579 
580   /* get new row pointers */
581   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
582   bi[0] = 0;
583 
584   /* bdiag is location of diagonal in factor */
585   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
586   bdiag[0] = 0;
587 
588   /* linked list for storing column indices of the active row */
589   nlnk = n + 1;
590   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
591 
592   ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr);
593 
594   /* initial FreeSpace size is f*(ai[n]+1) */
595   f = info->fill;
596   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
597   current_space = free_space;
598 
599   for (i=0; i<n; i++) {
600     /* copy previous fill into linked list */
601     nzi = 0;
602     nnz = ai[r[i]+1] - ai[r[i]];
603     if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
604     ajtmp = aj + ai[r[i]];
605     ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
606     nzi += nlnk;
607 
608     /* add pivot rows into linked list */
609     row = lnk[n];
610     while (row < i) {
611       nzbd    = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
612       ajtmp   = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
613       ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
614       nzi += nlnk;
615       row  = lnk[row];
616     }
617     bi[i+1] = bi[i] + nzi;
618     im[i]   = nzi;
619 
620     /* mark bdiag */
621     nzbd = 0;
622     nnz  = nzi;
623     k    = lnk[n];
624     while (nnz-- && k < i){
625       nzbd++;
626       k = lnk[k];
627     }
628     bdiag[i] = bi[i] + nzbd;
629 
630     /* if free space is not available, make more free space */
631     if (current_space->local_remaining<nzi) {
632       nnz = (n - i)*nzi; /* estimated and max additional space needed */
633       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
634       reallocs++;
635     }
636 
637     /* copy data into free space, then initialize lnk */
638     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
639     bi_ptr[i] = current_space->array;
640     current_space->array           += nzi;
641     current_space->local_used      += nzi;
642     current_space->local_remaining -= nzi;
643   }
644 #if defined(PETSC_USE_INFO)
645   if (ai[n] != 0) {
646     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
647     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
648     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
649     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
650     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
651   } else {
652     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
653   }
654 #endif
655 
656   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
657   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
658 
659   /* destroy list of free space and other temporary array(s) */
660   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
661   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
662   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
663   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
664 
665   /* put together the new matrix */
666   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
667   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
668   b    = (Mat_SeqBAIJ*)(B)->data;
669   b->free_a       = PETSC_TRUE;
670   b->free_ij      = PETSC_TRUE;
671   b->singlemalloc = PETSC_FALSE;
672   ierr          = PetscMalloc((bi[n]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr);
673   b->j          = bj;
674   b->i          = bi;
675   b->diag       = bdiag;
676   b->free_diag  = PETSC_TRUE;
677   b->ilen       = 0;
678   b->imax       = 0;
679   b->row        = isrow;
680   b->col        = iscol;
681   b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;
682   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
683   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
684   b->icol       = isicol;
685   ierr = PetscMalloc((bs*n+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
686   ierr = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2));CHKERRQ(ierr);
687 
688   b->maxnz = b->nz = bi[n] ;
689   (B)->factor                =  MAT_FACTOR_LU;
690   (B)->info.factor_mallocs   = reallocs;
691   (B)->info.fill_ratio_given = f;
692 
693   if (ai[n] != 0) {
694     (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
695   } else {
696     (B)->info.fill_ratio_needed = 0.0;
697   }
698 
699   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
700   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
701   both_identity = (PetscTruth) (row_identity && col_identity);
702   ierr = MatSeqBAIJSetNumericFactorization(B,both_identity);CHKERRQ(ierr);
703   PetscFunctionReturn(0);
704  }
705 
706