xref: /petsc/src/mat/impls/baij/seq/baijfact3.c (revision 5f70456ab6f3cd2e59d3fbe1a6b3b262faade6ac)
1be1d678aSKris Buschelman 
283287d42SBarry Smith /*
383287d42SBarry Smith     Factorization code for BAIJ format.
483287d42SBarry Smith */
5c6db04a5SJed Brown #include <../src/mat/impls/baij/seq/baij.h>
6af0996ceSBarry Smith #include <petsc/private/kernels/blockinvert.h>
783287d42SBarry Smith 
8db4efbfdSBarry Smith /*
9db4efbfdSBarry Smith    This is used to set the numeric factorization for both LU and ILU symbolic factorization
10db4efbfdSBarry Smith */
11ace3abfcSBarry Smith PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat fact,PetscBool natural)
12ae3d28f0SHong Zhang {
13ae3d28f0SHong Zhang   PetscFunctionBegin;
146929473cSShri Abhyankar   if (natural) {
156929473cSShri Abhyankar     switch (fact->rmap->bs) {
16048b5e81SShri Abhyankar     case 1:
17048b5e81SShri Abhyankar       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
18048b5e81SShri Abhyankar       break;
196929473cSShri Abhyankar     case 2:
204dd39f65SShri Abhyankar       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering;
216929473cSShri Abhyankar       break;
226929473cSShri Abhyankar     case 3:
234dd39f65SShri Abhyankar       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering;
246929473cSShri Abhyankar       break;
256929473cSShri Abhyankar     case 4:
264dd39f65SShri Abhyankar       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
276929473cSShri Abhyankar       break;
286929473cSShri Abhyankar     case 5:
294dd39f65SShri Abhyankar       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
306929473cSShri Abhyankar       break;
316929473cSShri Abhyankar     case 6:
324dd39f65SShri Abhyankar       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering;
336929473cSShri Abhyankar       break;
346929473cSShri Abhyankar     case 7:
354dd39f65SShri Abhyankar       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering;
366929473cSShri Abhyankar       break;
3796e086a2SDaniel Kokron     case 9:
38*5f70456aSHong Zhang #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
3996e086a2SDaniel Kokron       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_9_NaturalOrdering;
40aee5c371SBarry Smith #else
41aee5c371SBarry Smith       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
42aee5c371SBarry Smith #endif
4396e086a2SDaniel Kokron       break;
4429a97285SShri Abhyankar     case 15:
4529a97285SShri Abhyankar       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_15_NaturalOrdering;
4629a97285SShri Abhyankar       break;
476929473cSShri Abhyankar     default:
484dd39f65SShri Abhyankar       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
496929473cSShri Abhyankar       break;
506929473cSShri Abhyankar     }
516ba06ab7SHong Zhang   } else {
52ae3d28f0SHong Zhang     switch (fact->rmap->bs) {
53048b5e81SShri Abhyankar     case 1:
54048b5e81SShri Abhyankar       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
55048b5e81SShri Abhyankar       break;
56ae3d28f0SHong Zhang     case 2:
574dd39f65SShri Abhyankar       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
58ae3d28f0SHong Zhang       break;
59ae3d28f0SHong Zhang     case 3:
604dd39f65SShri Abhyankar       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
61ae3d28f0SHong Zhang       break;
62ae3d28f0SHong Zhang     case 4:
634dd39f65SShri Abhyankar       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
64ae3d28f0SHong Zhang       break;
65ae3d28f0SHong Zhang     case 5:
664dd39f65SShri Abhyankar       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
67ae3d28f0SHong Zhang       break;
68ae3d28f0SHong Zhang     case 6:
694dd39f65SShri Abhyankar       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
70ae3d28f0SHong Zhang       break;
71ae3d28f0SHong Zhang     case 7:
724dd39f65SShri Abhyankar       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
73ae3d28f0SHong Zhang       break;
74ae3d28f0SHong Zhang     default:
754dd39f65SShri Abhyankar       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
76ae3d28f0SHong Zhang       break;
77ae3d28f0SHong Zhang     }
786929473cSShri Abhyankar   }
79ae3d28f0SHong Zhang   PetscFunctionReturn(0);
80ae3d28f0SHong Zhang }
81ae3d28f0SHong Zhang 
82ace3abfcSBarry Smith PetscErrorCode MatSeqBAIJSetNumericFactorization_inplace(Mat inA,PetscBool natural)
83db4efbfdSBarry Smith {
84db4efbfdSBarry Smith   PetscFunctionBegin;
85db4efbfdSBarry Smith   if (natural) {
86db4efbfdSBarry Smith     switch (inA->rmap->bs) {
87db4efbfdSBarry Smith     case 1:
8806e38f1dSHong Zhang       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace;
89db4efbfdSBarry Smith       break;
90db4efbfdSBarry Smith     case 2:
9106e38f1dSHong Zhang       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_inplace;
92db4efbfdSBarry Smith       break;
93db4efbfdSBarry Smith     case 3:
9406e38f1dSHong Zhang       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering_inplace;
95db4efbfdSBarry Smith       break;
96db4efbfdSBarry Smith     case 4:
97ce63c4c1SBarry Smith #if defined(PETSC_USE_REAL_MAT_SINGLE)
98db4efbfdSBarry Smith       {
99ace3abfcSBarry Smith         PetscBool      sse_enabled_local;
100db4efbfdSBarry Smith         PetscErrorCode ierr;
1010298fd71SBarry Smith         ierr = PetscSSEIsEnabled(inA->comm,&sse_enabled_local,NULL);CHKERRQ(ierr);
102db4efbfdSBarry Smith         if (sse_enabled_local) {
103db4efbfdSBarry Smith #  if defined(PETSC_HAVE_SSE)
104db4efbfdSBarry Smith           int i,*AJ=a->j,nz=a->nz,n=a->mbs;
105db4efbfdSBarry Smith           if (n==(unsigned short)n) {
106db4efbfdSBarry Smith             unsigned short *aj=(unsigned short*)AJ;
10726fbe8dcSKarl Rupp             for (i=0; i<nz; i++) aj[i] = (unsigned short)AJ[i];
10826fbe8dcSKarl Rupp 
109db4efbfdSBarry Smith             inA->ops->setunfactored   = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE_usj;
110db4efbfdSBarry Smith             inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE_usj;
11126fbe8dcSKarl Rupp 
112db4efbfdSBarry Smith             ierr = PetscInfo(inA,"Using special SSE, in-place natural ordering, ushort j index factor BS=4\n");CHKERRQ(ierr);
113db4efbfdSBarry Smith           } else {
114db4efbfdSBarry Smith             /* Scale the column indices for easier indexing in MatSolve. */
115db4efbfdSBarry Smith /*            for (i=0;i<nz;i++) { */
116db4efbfdSBarry Smith /*              AJ[i] = AJ[i]*4; */
117db4efbfdSBarry Smith /*            } */
118db4efbfdSBarry Smith             inA->ops->setunfactored   = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE;
119db4efbfdSBarry Smith             inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE;
12026fbe8dcSKarl Rupp 
121db4efbfdSBarry Smith             ierr = PetscInfo(inA,"Using special SSE, in-place natural ordering, int j index factor BS=4\n");CHKERRQ(ierr);
122db4efbfdSBarry Smith           }
123db4efbfdSBarry Smith #  else
124db4efbfdSBarry Smith           /* This should never be reached.  If so, problem in PetscSSEIsEnabled. */
125e32f2f54SBarry Smith           SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSE Hardware unavailable");
126db4efbfdSBarry Smith #  endif
127db4efbfdSBarry Smith         } else {
12806e38f1dSHong Zhang           inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace;
129db4efbfdSBarry Smith         }
130db4efbfdSBarry Smith       }
131db4efbfdSBarry Smith #else
13206e38f1dSHong Zhang       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace;
133db4efbfdSBarry Smith #endif
134db4efbfdSBarry Smith       break;
135db4efbfdSBarry Smith     case 5:
13606e38f1dSHong Zhang       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering_inplace;
137db4efbfdSBarry Smith       break;
138db4efbfdSBarry Smith     case 6:
13906e38f1dSHong Zhang       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering_inplace;
140db4efbfdSBarry Smith       break;
141db4efbfdSBarry Smith     case 7:
14206e38f1dSHong Zhang       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering_inplace;
143db4efbfdSBarry Smith       break;
144db4efbfdSBarry Smith     default:
14506e38f1dSHong Zhang       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace;
146db4efbfdSBarry Smith       break;
147db4efbfdSBarry Smith     }
148db4efbfdSBarry Smith   } else {
149db4efbfdSBarry Smith     switch (inA->rmap->bs) {
150db4efbfdSBarry Smith     case 1:
15106e38f1dSHong Zhang       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace;
152db4efbfdSBarry Smith       break;
153db4efbfdSBarry Smith     case 2:
15406e38f1dSHong Zhang       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_inplace;
155db4efbfdSBarry Smith       break;
156db4efbfdSBarry Smith     case 3:
15706e38f1dSHong Zhang       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_inplace;
158db4efbfdSBarry Smith       break;
159db4efbfdSBarry Smith     case 4:
16006e38f1dSHong Zhang       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_inplace;
161db4efbfdSBarry Smith       break;
162db4efbfdSBarry Smith     case 5:
16306e38f1dSHong Zhang       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_inplace;
164db4efbfdSBarry Smith       break;
165db4efbfdSBarry Smith     case 6:
16606e38f1dSHong Zhang       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_inplace;
167db4efbfdSBarry Smith       break;
168db4efbfdSBarry Smith     case 7:
16906e38f1dSHong Zhang       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_inplace;
170db4efbfdSBarry Smith       break;
171db4efbfdSBarry Smith     default:
17206e38f1dSHong Zhang       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace;
173db4efbfdSBarry Smith       break;
174db4efbfdSBarry Smith     }
175db4efbfdSBarry Smith   }
176db4efbfdSBarry Smith   PetscFunctionReturn(0);
177db4efbfdSBarry Smith }
178db4efbfdSBarry Smith 
17983287d42SBarry Smith /*
18083287d42SBarry Smith     The symbolic factorization code is identical to that for AIJ format,
18183287d42SBarry Smith   except for very small changes since this is now a SeqBAIJ datastructure.
18283287d42SBarry Smith   NOT good code reuse.
18383287d42SBarry Smith */
184c6db04a5SJed Brown #include <petscbt.h>
185c6db04a5SJed Brown #include <../src/mat/utils/freespace.h>
186c7272abeSHong Zhang 
1874dd39f65SShri Abhyankar PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
18883287d42SBarry Smith {
18983287d42SBarry Smith   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data,*b;
190c7272abeSHong Zhang   PetscInt           n  =a->mbs,bs = A->rmap->bs,bs2=a->bs2;
191ace3abfcSBarry Smith   PetscBool          row_identity,col_identity,both_identity;
19283287d42SBarry Smith   IS                 isicol;
1936849ba73SBarry Smith   PetscErrorCode     ierr;
194c7272abeSHong Zhang   const PetscInt     *r,*ic;
195c7272abeSHong Zhang   PetscInt           i,*ai=a->i,*aj=a->j;
196c7272abeSHong Zhang   PetscInt           *bi,*bj,*ajtmp;
197c7272abeSHong Zhang   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
198c7272abeSHong Zhang   PetscReal          f;
199c7272abeSHong Zhang   PetscInt           nlnk,*lnk,k,**bi_ptr;
2000298fd71SBarry Smith   PetscFreeSpaceList free_space=NULL,current_space=NULL;
201c7272abeSHong Zhang   PetscBT            lnkbt;
202ece542f9SHong Zhang   PetscBool          missing;
20383287d42SBarry Smith 
20483287d42SBarry Smith   PetscFunctionBegin;
205e32f2f54SBarry Smith   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
206ece542f9SHong Zhang   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
207ece542f9SHong Zhang   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
208ece542f9SHong Zhang 
2096ba06ab7SHong Zhang   if (bs>1) {  /* check shifttype */
210f23aa3ddSBarry Smith     if (info->shifttype == MAT_SHIFT_NONZERO || info->shifttype == MAT_SHIFT_POSITIVE_DEFINITE) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only MAT_SHIFT_NONE and MAT_SHIFT_INBLOCKS are supported for BAIJ matrix");
2116ba06ab7SHong Zhang   }
2126ba06ab7SHong Zhang 
21383287d42SBarry Smith   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
21483287d42SBarry Smith   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
21583287d42SBarry Smith   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
21683287d42SBarry Smith 
217bc74c81fSJed Brown   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
218854ce69bSBarry Smith   ierr  = PetscMalloc1(n+1,&bi);CHKERRQ(ierr);
219854ce69bSBarry Smith   ierr  = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr);
220a6df321fSShri Abhyankar   bi[0] = bdiag[0] = 0;
221b2b2dd24SShri Abhyankar 
222b2b2dd24SShri Abhyankar   /* linked list for storing column indices of the active row */
223b2b2dd24SShri Abhyankar   nlnk = n + 1;
224b2b2dd24SShri Abhyankar   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
225b2b2dd24SShri Abhyankar 
226dcca6d9dSJed Brown   ierr = PetscMalloc2(n+1,&bi_ptr,n+1,&im);CHKERRQ(ierr);
227b2b2dd24SShri Abhyankar 
228b2b2dd24SShri Abhyankar   /* initial FreeSpace size is f*(ai[n]+1) */
229b2b2dd24SShri Abhyankar   f    = info->fill;
230f91af8c7SBarry Smith   ierr = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);CHKERRQ(ierr);
23126fbe8dcSKarl Rupp 
232b2b2dd24SShri Abhyankar   current_space = free_space;
233b2b2dd24SShri Abhyankar 
234b2b2dd24SShri Abhyankar   for (i=0; i<n; i++) {
235b2b2dd24SShri Abhyankar     /* copy previous fill into linked list */
236b2b2dd24SShri Abhyankar     nzi = 0;
237b2b2dd24SShri Abhyankar     nnz = ai[r[i]+1] - ai[r[i]];
238b2b2dd24SShri Abhyankar     ajtmp = aj + ai[r[i]];
239b2b2dd24SShri Abhyankar     ierr  = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
240b2b2dd24SShri Abhyankar     nzi  += nlnk;
241b2b2dd24SShri Abhyankar 
242b2b2dd24SShri Abhyankar     /* add pivot rows into linked list */
243b2b2dd24SShri Abhyankar     row = lnk[n];
244b2b2dd24SShri Abhyankar     while (row < i) {
245b2b2dd24SShri Abhyankar       nzbd  = bdiag[row] + 1;   /* num of entries in the row with column index <= row */
246b2b2dd24SShri Abhyankar       ajtmp = bi_ptr[row] + nzbd;   /* points to the entry next to the diagonal */
247b2b2dd24SShri Abhyankar       ierr  = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
248b2b2dd24SShri Abhyankar       nzi  += nlnk;
249b2b2dd24SShri Abhyankar       row   = lnk[row];
250b2b2dd24SShri Abhyankar     }
251b2b2dd24SShri Abhyankar     bi[i+1] = bi[i] + nzi;
252b2b2dd24SShri Abhyankar     im[i]   = nzi;
253b2b2dd24SShri Abhyankar 
254b2b2dd24SShri Abhyankar     /* mark bdiag */
255b2b2dd24SShri Abhyankar     nzbd = 0;
256b2b2dd24SShri Abhyankar     nnz  = nzi;
257b2b2dd24SShri Abhyankar     k    = lnk[n];
258b2b2dd24SShri Abhyankar     while (nnz-- && k < i) {
259b2b2dd24SShri Abhyankar       nzbd++;
260b2b2dd24SShri Abhyankar       k = lnk[k];
261b2b2dd24SShri Abhyankar     }
2622ce24eb6SHong Zhang     bdiag[i] = nzbd; /* note : bdaig[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
263b2b2dd24SShri Abhyankar 
264b2b2dd24SShri Abhyankar     /* if free space is not available, make more free space */
265b2b2dd24SShri Abhyankar     if (current_space->local_remaining<nzi) {
266f91af8c7SBarry Smith       nnz  = PetscIntMultTruncate(2,PetscIntMultTruncate(n - i,nzi)); /* estimated and max additional space needed */
267b2b2dd24SShri Abhyankar       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
268b2b2dd24SShri Abhyankar       reallocs++;
269b2b2dd24SShri Abhyankar     }
270b2b2dd24SShri Abhyankar 
271b2b2dd24SShri Abhyankar     /* copy data into free space, then initialize lnk */
272b2b2dd24SShri Abhyankar     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
27326fbe8dcSKarl Rupp 
274b2b2dd24SShri Abhyankar     bi_ptr[i]                       = current_space->array;
275b2b2dd24SShri Abhyankar     current_space->array           += nzi;
276b2b2dd24SShri Abhyankar     current_space->local_used      += nzi;
277b2b2dd24SShri Abhyankar     current_space->local_remaining -= nzi;
278b2b2dd24SShri Abhyankar   }
279b2b2dd24SShri Abhyankar 
280b2b2dd24SShri Abhyankar   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
281b2b2dd24SShri Abhyankar   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
282b2b2dd24SShri Abhyankar 
2839263d837SHong Zhang   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
284854ce69bSBarry Smith   ierr = PetscMalloc1(bi[n]+1,&bj);CHKERRQ(ierr);
2852ce24eb6SHong Zhang   ierr = PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);CHKERRQ(ierr);
286b2b2dd24SShri Abhyankar   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
287b2b2dd24SShri Abhyankar   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
288b2b2dd24SShri Abhyankar 
289b2b2dd24SShri Abhyankar   /* put together the new matrix */
290367daffbSBarry Smith   ierr = MatSeqBAIJSetPreallocation(B,bs,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr);
2913bb1ff40SBarry Smith   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);CHKERRQ(ierr);
292b2b2dd24SShri Abhyankar   b    = (Mat_SeqBAIJ*)(B)->data;
29326fbe8dcSKarl Rupp 
294b2b2dd24SShri Abhyankar   b->free_a       = PETSC_TRUE;
295b2b2dd24SShri Abhyankar   b->free_ij      = PETSC_TRUE;
296b2b2dd24SShri Abhyankar   b->singlemalloc = PETSC_FALSE;
29726fbe8dcSKarl Rupp 
298854ce69bSBarry Smith   ierr             = PetscMalloc1((bdiag[0]+1)*bs2,&b->a);CHKERRQ(ierr);
299b2b2dd24SShri Abhyankar   b->j             = bj;
300b2b2dd24SShri Abhyankar   b->i             = bi;
301b2b2dd24SShri Abhyankar   b->diag          = bdiag;
302b2b2dd24SShri Abhyankar   b->free_diag     = PETSC_TRUE;
303b2b2dd24SShri Abhyankar   b->ilen          = 0;
304b2b2dd24SShri Abhyankar   b->imax          = 0;
305b2b2dd24SShri Abhyankar   b->row           = isrow;
306b2b2dd24SShri Abhyankar   b->col           = iscol;
307b2b2dd24SShri Abhyankar   b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;
30826fbe8dcSKarl Rupp 
309b2b2dd24SShri Abhyankar   ierr    = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
310b2b2dd24SShri Abhyankar   ierr    = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
311b2b2dd24SShri Abhyankar   b->icol = isicol;
312854ce69bSBarry Smith   ierr    = PetscMalloc1(bs*n+bs,&b->solve_work);CHKERRQ(ierr);
3133bb1ff40SBarry Smith   ierr    = PetscLogObjectMemory((PetscObject)B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2));CHKERRQ(ierr);
314b2b2dd24SShri Abhyankar 
315b2b2dd24SShri Abhyankar   b->maxnz = b->nz = bdiag[0]+1;
31626fbe8dcSKarl Rupp 
317d5f3da31SBarry Smith   B->factortype            =  MAT_FACTOR_LU;
318b2b2dd24SShri Abhyankar   B->info.factor_mallocs   = reallocs;
319b2b2dd24SShri Abhyankar   B->info.fill_ratio_given = f;
320b2b2dd24SShri Abhyankar 
321b2b2dd24SShri Abhyankar   if (ai[n] != 0) {
322b2b2dd24SShri Abhyankar     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
323b2b2dd24SShri Abhyankar   } else {
324b2b2dd24SShri Abhyankar     B->info.fill_ratio_needed = 0.0;
325b2b2dd24SShri Abhyankar   }
3269263d837SHong Zhang #if defined(PETSC_USE_INFO)
3279263d837SHong Zhang   if (ai[n] != 0) {
3289263d837SHong Zhang     PetscReal af = B->info.fill_ratio_needed;
32957622a8eSBarry Smith     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr);
33057622a8eSBarry Smith     ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr);
33157622a8eSBarry Smith     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);CHKERRQ(ierr);
3329263d837SHong Zhang     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
3339263d837SHong Zhang   } else {
3349263d837SHong Zhang     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
3359263d837SHong Zhang   }
3369263d837SHong Zhang #endif
337b2b2dd24SShri Abhyankar 
338b2b2dd24SShri Abhyankar   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
339b2b2dd24SShri Abhyankar   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
34026fbe8dcSKarl Rupp 
341ace3abfcSBarry Smith   both_identity = (PetscBool) (row_identity && col_identity);
34226fbe8dcSKarl Rupp 
3434dd39f65SShri Abhyankar   ierr = MatSeqBAIJSetNumericFactorization(B,both_identity);CHKERRQ(ierr);
344b2b2dd24SShri Abhyankar   PetscFunctionReturn(0);
345b2b2dd24SShri Abhyankar }
346b2b2dd24SShri Abhyankar 
34706e38f1dSHong Zhang PetscErrorCode MatLUFactorSymbolic_SeqBAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
348faca2338SShri Abhyankar {
349faca2338SShri Abhyankar   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data,*b;
350faca2338SShri Abhyankar   PetscInt           n  =a->mbs,bs = A->rmap->bs,bs2=a->bs2;
351ace3abfcSBarry Smith   PetscBool          row_identity,col_identity,both_identity;
352faca2338SShri Abhyankar   IS                 isicol;
353faca2338SShri Abhyankar   PetscErrorCode     ierr;
354faca2338SShri Abhyankar   const PetscInt     *r,*ic;
355faca2338SShri Abhyankar   PetscInt           i,*ai=a->i,*aj=a->j;
356faca2338SShri Abhyankar   PetscInt           *bi,*bj,*ajtmp;
357faca2338SShri Abhyankar   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
358faca2338SShri Abhyankar   PetscReal          f;
359faca2338SShri Abhyankar   PetscInt           nlnk,*lnk,k,**bi_ptr;
3600298fd71SBarry Smith   PetscFreeSpaceList free_space=NULL,current_space=NULL;
361faca2338SShri Abhyankar   PetscBT            lnkbt;
362ece542f9SHong Zhang   PetscBool          missing;
363faca2338SShri Abhyankar 
364faca2338SShri Abhyankar   PetscFunctionBegin;
365e32f2f54SBarry Smith   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
366ece542f9SHong Zhang   ierr = MatMissingDiagonal(A,&missing,&i);CHKERRQ(ierr);
367ece542f9SHong Zhang   if (missing) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %D",i);
368ece542f9SHong Zhang 
369faca2338SShri Abhyankar   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
370faca2338SShri Abhyankar   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
371faca2338SShri Abhyankar   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
372faca2338SShri Abhyankar 
373bc74c81fSJed Brown   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
374854ce69bSBarry Smith   ierr = PetscMalloc1(n+1,&bi);CHKERRQ(ierr);
375854ce69bSBarry Smith   ierr = PetscMalloc1(n+1,&bdiag);CHKERRQ(ierr);
376bc74c81fSJed Brown 
377a6df321fSShri Abhyankar   bi[0] = bdiag[0] = 0;
378c7272abeSHong Zhang 
379c7272abeSHong Zhang   /* linked list for storing column indices of the active row */
380c7272abeSHong Zhang   nlnk = n + 1;
381c7272abeSHong Zhang   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
382c7272abeSHong Zhang 
383dcca6d9dSJed Brown   ierr = PetscMalloc2(n+1,&bi_ptr,n+1,&im);CHKERRQ(ierr);
384c7272abeSHong Zhang 
385c7272abeSHong Zhang   /* initial FreeSpace size is f*(ai[n]+1) */
386c7272abeSHong Zhang   f             = info->fill;
387f91af8c7SBarry Smith   ierr          = PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);CHKERRQ(ierr);
388c7272abeSHong Zhang   current_space = free_space;
38983287d42SBarry Smith 
39083287d42SBarry Smith   for (i=0; i<n; i++) {
391c7272abeSHong Zhang     /* copy previous fill into linked list */
39283287d42SBarry Smith     nzi = 0;
393c7272abeSHong Zhang     nnz = ai[r[i]+1] - ai[r[i]];
394c7272abeSHong Zhang     ajtmp = aj + ai[r[i]];
395c7272abeSHong Zhang     ierr  = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
396c7272abeSHong Zhang     nzi  += nlnk;
397c7272abeSHong Zhang 
398c7272abeSHong Zhang     /* add pivot rows into linked list */
399c7272abeSHong Zhang     row = lnk[n];
400c7272abeSHong Zhang     while (row < i) {
401c7272abeSHong Zhang       nzbd  = bdiag[row] - bi[row] + 1;   /* num of entries in the row with column index <= row */
402c7272abeSHong Zhang       ajtmp = bi_ptr[row] + nzbd;   /* points to the entry next to the diagonal */
403c7272abeSHong Zhang       ierr  = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
404c7272abeSHong Zhang       nzi  += nlnk;
405c7272abeSHong Zhang       row   = lnk[row];
40683287d42SBarry Smith     }
407c7272abeSHong Zhang     bi[i+1] = bi[i] + nzi;
408c7272abeSHong Zhang     im[i]   = nzi;
409c7272abeSHong Zhang 
410c7272abeSHong Zhang     /* mark bdiag */
411c7272abeSHong Zhang     nzbd = 0;
412c7272abeSHong Zhang     nnz  = nzi;
413c7272abeSHong Zhang     k    = lnk[n];
414c7272abeSHong Zhang     while (nnz-- && k < i) {
415c7272abeSHong Zhang       nzbd++;
416c7272abeSHong Zhang       k = lnk[k];
417c7272abeSHong Zhang     }
418c7272abeSHong Zhang     bdiag[i] = bi[i] + nzbd;
419c7272abeSHong Zhang 
420c7272abeSHong Zhang     /* if free space is not available, make more free space */
421c7272abeSHong Zhang     if (current_space->local_remaining<nzi) {
422f91af8c7SBarry Smith       nnz  = PetscIntMultTruncate(n - i,nzi); /* estimated and max additional space needed */
423c7272abeSHong Zhang       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
424c7272abeSHong Zhang       reallocs++;
42583287d42SBarry Smith     }
42683287d42SBarry Smith 
427c7272abeSHong Zhang     /* copy data into free space, then initialize lnk */
428c7272abeSHong Zhang     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
42926fbe8dcSKarl Rupp 
430c7272abeSHong Zhang     bi_ptr[i]                       = current_space->array;
431c7272abeSHong Zhang     current_space->array           += nzi;
432c7272abeSHong Zhang     current_space->local_used      += nzi;
433c7272abeSHong Zhang     current_space->local_remaining -= nzi;
434c7272abeSHong Zhang   }
4356cf91177SBarry Smith #if defined(PETSC_USE_INFO)
43683287d42SBarry Smith   if (ai[n] != 0) {
437c7272abeSHong Zhang     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
43857622a8eSBarry Smith     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);CHKERRQ(ierr);
43957622a8eSBarry Smith     ierr = PetscInfo1(A,"Run with -pc_factor_fill %g or use \n",(double)af);CHKERRQ(ierr);
44057622a8eSBarry Smith     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%g);\n",(double)af);CHKERRQ(ierr);
441ae15b995SBarry Smith     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
44283287d42SBarry Smith   } else {
443c7272abeSHong Zhang     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
44483287d42SBarry Smith   }
44563ba0a88SBarry Smith #endif
44683287d42SBarry Smith 
44783287d42SBarry Smith   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
44883287d42SBarry Smith   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
44983287d42SBarry Smith 
450c7272abeSHong Zhang   /* destroy list of free space and other temporary array(s) */
451854ce69bSBarry Smith   ierr = PetscMalloc1(bi[n]+1,&bj);CHKERRQ(ierr);
452c7272abeSHong Zhang   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
453c7272abeSHong Zhang   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
454c7272abeSHong Zhang   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
45583287d42SBarry Smith 
45683287d42SBarry Smith   /* put together the new matrix */
457367daffbSBarry Smith   ierr = MatSeqBAIJSetPreallocation(B,bs,MAT_SKIP_ALLOCATION,NULL);CHKERRQ(ierr);
4583bb1ff40SBarry Smith   ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);CHKERRQ(ierr);
459719d5645SBarry Smith   b    = (Mat_SeqBAIJ*)(B)->data;
46026fbe8dcSKarl Rupp 
461e6b907acSBarry Smith   b->free_a       = PETSC_TRUE;
462e6b907acSBarry Smith   b->free_ij      = PETSC_TRUE;
463c7272abeSHong Zhang   b->singlemalloc = PETSC_FALSE;
46426fbe8dcSKarl Rupp 
465854ce69bSBarry Smith   ierr             = PetscMalloc1((bi[n]+1)*bs2,&b->a);CHKERRQ(ierr);
466c7272abeSHong Zhang   b->j             = bj;
467c7272abeSHong Zhang   b->i             = bi;
468c7272abeSHong Zhang   b->diag          = bdiag;
4697f53bb6cSHong Zhang   b->free_diag     = PETSC_TRUE;
47083287d42SBarry Smith   b->ilen          = 0;
47183287d42SBarry Smith   b->imax          = 0;
47283287d42SBarry Smith   b->row           = isrow;
47383287d42SBarry Smith   b->col           = iscol;
474d3d32019SBarry Smith   b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;
47526fbe8dcSKarl Rupp 
47683287d42SBarry Smith   ierr    = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
47783287d42SBarry Smith   ierr    = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
47883287d42SBarry Smith   b->icol = isicol;
47926fbe8dcSKarl Rupp 
480854ce69bSBarry Smith   ierr = PetscMalloc1(bs*n+bs,&b->solve_work);CHKERRQ(ierr);
4813bb1ff40SBarry Smith   ierr = PetscLogObjectMemory((PetscObject)B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2));CHKERRQ(ierr);
48283287d42SBarry Smith 
483c7272abeSHong Zhang   b->maxnz = b->nz = bi[n];
48426fbe8dcSKarl Rupp 
485d5f3da31SBarry Smith   (B)->factortype            =  MAT_FACTOR_LU;
486719d5645SBarry Smith   (B)->info.factor_mallocs   = reallocs;
487719d5645SBarry Smith   (B)->info.fill_ratio_given = f;
488c7272abeSHong Zhang 
48983287d42SBarry Smith   if (ai[n] != 0) {
490c7272abeSHong Zhang     (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
49183287d42SBarry Smith   } else {
492719d5645SBarry Smith     (B)->info.fill_ratio_needed = 0.0;
49383287d42SBarry Smith   }
494c7272abeSHong Zhang 
495db4efbfdSBarry Smith   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
496db4efbfdSBarry Smith   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
49726fbe8dcSKarl Rupp 
498ace3abfcSBarry Smith   both_identity = (PetscBool) (row_identity && col_identity);
49926fbe8dcSKarl Rupp 
5008b1456e3SHong Zhang   ierr = MatSeqBAIJSetNumericFactorization_inplace(B,both_identity);CHKERRQ(ierr);
50183287d42SBarry Smith   PetscFunctionReturn(0);
50283287d42SBarry Smith }
503db4efbfdSBarry Smith 
504