xref: /petsc/src/mat/impls/baij/seq/baijfact3.c (revision 57508ece14a6b1339c0bbf016ecd72f673a062b0)
183287d42SBarry Smith /*
283287d42SBarry Smith     Factorization code for BAIJ format.
383287d42SBarry Smith */
4c6db04a5SJed Brown #include <../src/mat/impls/baij/seq/baij.h>
5af0996ceSBarry Smith #include <petsc/private/kernels/blockinvert.h>
683287d42SBarry Smith 
7db4efbfdSBarry Smith /*
8db4efbfdSBarry Smith    This is used to set the numeric factorization for both LU and ILU symbolic factorization
9db4efbfdSBarry Smith */
10d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat fact, PetscBool natural)
11d71ae5a4SJacob Faibussowitsch {
12ae3d28f0SHong Zhang   PetscFunctionBegin;
136929473cSShri Abhyankar   if (natural) {
146929473cSShri Abhyankar     switch (fact->rmap->bs) {
15d71ae5a4SJacob Faibussowitsch     case 1:
16d71ae5a4SJacob Faibussowitsch       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
17d71ae5a4SJacob Faibussowitsch       break;
18d71ae5a4SJacob Faibussowitsch     case 2:
19d71ae5a4SJacob Faibussowitsch       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering;
20d71ae5a4SJacob Faibussowitsch       break;
21d71ae5a4SJacob Faibussowitsch     case 3:
22d71ae5a4SJacob Faibussowitsch       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering;
23d71ae5a4SJacob Faibussowitsch       break;
24d71ae5a4SJacob Faibussowitsch     case 4:
25d71ae5a4SJacob Faibussowitsch       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
26d71ae5a4SJacob Faibussowitsch       break;
27d71ae5a4SJacob Faibussowitsch     case 5:
28d71ae5a4SJacob Faibussowitsch       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
29d71ae5a4SJacob Faibussowitsch       break;
30d71ae5a4SJacob Faibussowitsch     case 6:
31d71ae5a4SJacob Faibussowitsch       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering;
32d71ae5a4SJacob Faibussowitsch       break;
33d71ae5a4SJacob Faibussowitsch     case 7:
34d71ae5a4SJacob Faibussowitsch       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering;
35d71ae5a4SJacob Faibussowitsch       break;
3696e086a2SDaniel Kokron     case 9:
375f70456aSHong Zhang #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
3896e086a2SDaniel Kokron       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_9_NaturalOrdering;
39aee5c371SBarry Smith #else
40aee5c371SBarry Smith       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
41aee5c371SBarry Smith #endif
4296e086a2SDaniel Kokron       break;
43d71ae5a4SJacob Faibussowitsch     case 15:
44d71ae5a4SJacob Faibussowitsch       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_15_NaturalOrdering;
45d71ae5a4SJacob Faibussowitsch       break;
46d71ae5a4SJacob Faibussowitsch     default:
47d71ae5a4SJacob Faibussowitsch       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
48d71ae5a4SJacob Faibussowitsch       break;
496929473cSShri Abhyankar     }
506ba06ab7SHong Zhang   } else {
51ae3d28f0SHong Zhang     switch (fact->rmap->bs) {
52d71ae5a4SJacob Faibussowitsch     case 1:
53d71ae5a4SJacob Faibussowitsch       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
54d71ae5a4SJacob Faibussowitsch       break;
55d71ae5a4SJacob Faibussowitsch     case 2:
56d71ae5a4SJacob Faibussowitsch       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
57d71ae5a4SJacob Faibussowitsch       break;
58d71ae5a4SJacob Faibussowitsch     case 3:
59d71ae5a4SJacob Faibussowitsch       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
60d71ae5a4SJacob Faibussowitsch       break;
61d71ae5a4SJacob Faibussowitsch     case 4:
62d71ae5a4SJacob Faibussowitsch       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
63d71ae5a4SJacob Faibussowitsch       break;
64d71ae5a4SJacob Faibussowitsch     case 5:
65d71ae5a4SJacob Faibussowitsch       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
66d71ae5a4SJacob Faibussowitsch       break;
67d71ae5a4SJacob Faibussowitsch     case 6:
68d71ae5a4SJacob Faibussowitsch       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
69d71ae5a4SJacob Faibussowitsch       break;
70d71ae5a4SJacob Faibussowitsch     case 7:
71d71ae5a4SJacob Faibussowitsch       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
72d71ae5a4SJacob Faibussowitsch       break;
73d71ae5a4SJacob Faibussowitsch     default:
74d71ae5a4SJacob Faibussowitsch       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
75d71ae5a4SJacob Faibussowitsch       break;
76ae3d28f0SHong Zhang     }
776929473cSShri Abhyankar   }
783ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
79ae3d28f0SHong Zhang }
80ae3d28f0SHong Zhang 
81d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSeqBAIJSetNumericFactorization_inplace(Mat inA, PetscBool natural)
82d71ae5a4SJacob Faibussowitsch {
83db4efbfdSBarry Smith   PetscFunctionBegin;
84db4efbfdSBarry Smith   if (natural) {
85db4efbfdSBarry Smith     switch (inA->rmap->bs) {
86d71ae5a4SJacob Faibussowitsch     case 1:
87d71ae5a4SJacob Faibussowitsch       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace;
88d71ae5a4SJacob Faibussowitsch       break;
89d71ae5a4SJacob Faibussowitsch     case 2:
90d71ae5a4SJacob Faibussowitsch       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_inplace;
91d71ae5a4SJacob Faibussowitsch       break;
92d71ae5a4SJacob Faibussowitsch     case 3:
93d71ae5a4SJacob Faibussowitsch       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering_inplace;
94d71ae5a4SJacob Faibussowitsch       break;
95db4efbfdSBarry Smith     case 4:
96ce63c4c1SBarry Smith #if defined(PETSC_USE_REAL_MAT_SINGLE)
97db4efbfdSBarry Smith     {
98ace3abfcSBarry Smith       PetscBool sse_enabled_local;
999566063dSJacob Faibussowitsch       PetscCall(PetscSSEIsEnabled(inA->comm, &sse_enabled_local, NULL));
100db4efbfdSBarry Smith       if (sse_enabled_local) {
101db4efbfdSBarry Smith   #if defined(PETSC_HAVE_SSE)
102db4efbfdSBarry Smith         int i, *AJ = a->j, nz = a->nz, n = a->mbs;
103db4efbfdSBarry Smith         if (n == (unsigned short)n) {
104db4efbfdSBarry Smith           unsigned short *aj = (unsigned short *)AJ;
10526fbe8dcSKarl Rupp           for (i = 0; i < nz; i++) aj[i] = (unsigned short)AJ[i];
10626fbe8dcSKarl Rupp 
107db4efbfdSBarry Smith           inA->ops->setunfactored   = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE_usj;
108db4efbfdSBarry Smith           inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE_usj;
10926fbe8dcSKarl Rupp 
1109566063dSJacob Faibussowitsch           PetscCall(PetscInfo(inA, "Using special SSE, in-place natural ordering, ushort j index factor BS=4\n"));
111db4efbfdSBarry Smith         } else {
112db4efbfdSBarry Smith           /* Scale the column indices for easier indexing in MatSolve. */
113db4efbfdSBarry Smith           /*            for (i=0;i<nz;i++) { */
114db4efbfdSBarry Smith           /*              AJ[i] = AJ[i]*4; */
115db4efbfdSBarry Smith           /*            } */
116db4efbfdSBarry Smith           inA->ops->setunfactored   = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE;
117db4efbfdSBarry Smith           inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE;
11826fbe8dcSKarl Rupp 
1199566063dSJacob Faibussowitsch           PetscCall(PetscInfo(inA, "Using special SSE, in-place natural ordering, int j index factor BS=4\n"));
120db4efbfdSBarry Smith         }
121db4efbfdSBarry Smith   #else
122db4efbfdSBarry Smith         /* This should never be reached.  If so, problem in PetscSSEIsEnabled. */
123e32f2f54SBarry Smith         SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "SSE Hardware unavailable");
124db4efbfdSBarry Smith   #endif
125db4efbfdSBarry Smith       } else {
12606e38f1dSHong Zhang         inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace;
127db4efbfdSBarry Smith       }
128db4efbfdSBarry Smith     }
129db4efbfdSBarry Smith #else
13006e38f1dSHong Zhang       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace;
131db4efbfdSBarry Smith #endif
132db4efbfdSBarry Smith     break;
133d71ae5a4SJacob Faibussowitsch     case 5:
134d71ae5a4SJacob Faibussowitsch       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering_inplace;
135d71ae5a4SJacob Faibussowitsch       break;
136d71ae5a4SJacob Faibussowitsch     case 6:
137d71ae5a4SJacob Faibussowitsch       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering_inplace;
138d71ae5a4SJacob Faibussowitsch       break;
139d71ae5a4SJacob Faibussowitsch     case 7:
140d71ae5a4SJacob Faibussowitsch       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering_inplace;
141d71ae5a4SJacob Faibussowitsch       break;
142d71ae5a4SJacob Faibussowitsch     default:
143d71ae5a4SJacob Faibussowitsch       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace;
144d71ae5a4SJacob Faibussowitsch       break;
145db4efbfdSBarry Smith     }
146db4efbfdSBarry Smith   } else {
147db4efbfdSBarry Smith     switch (inA->rmap->bs) {
148d71ae5a4SJacob Faibussowitsch     case 1:
149d71ae5a4SJacob Faibussowitsch       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace;
150d71ae5a4SJacob Faibussowitsch       break;
151d71ae5a4SJacob Faibussowitsch     case 2:
152d71ae5a4SJacob Faibussowitsch       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_inplace;
153d71ae5a4SJacob Faibussowitsch       break;
154d71ae5a4SJacob Faibussowitsch     case 3:
155d71ae5a4SJacob Faibussowitsch       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_inplace;
156d71ae5a4SJacob Faibussowitsch       break;
157d71ae5a4SJacob Faibussowitsch     case 4:
158d71ae5a4SJacob Faibussowitsch       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_inplace;
159d71ae5a4SJacob Faibussowitsch       break;
160d71ae5a4SJacob Faibussowitsch     case 5:
161d71ae5a4SJacob Faibussowitsch       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_inplace;
162d71ae5a4SJacob Faibussowitsch       break;
163d71ae5a4SJacob Faibussowitsch     case 6:
164d71ae5a4SJacob Faibussowitsch       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_inplace;
165d71ae5a4SJacob Faibussowitsch       break;
166d71ae5a4SJacob Faibussowitsch     case 7:
167d71ae5a4SJacob Faibussowitsch       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_inplace;
168d71ae5a4SJacob Faibussowitsch       break;
169d71ae5a4SJacob Faibussowitsch     default:
170d71ae5a4SJacob Faibussowitsch       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace;
171d71ae5a4SJacob Faibussowitsch       break;
172db4efbfdSBarry Smith     }
173db4efbfdSBarry Smith   }
1743ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
175db4efbfdSBarry Smith }
176db4efbfdSBarry Smith 
17783287d42SBarry Smith /*
17883287d42SBarry Smith     The symbolic factorization code is identical to that for AIJ format,
17983287d42SBarry Smith   except for very small changes since this is now a SeqBAIJ datastructure.
18083287d42SBarry Smith   NOT good code reuse.
18183287d42SBarry Smith */
182c6db04a5SJed Brown #include <petscbt.h>
183c6db04a5SJed Brown #include <../src/mat/utils/freespace.h>
184c7272abeSHong Zhang 
185d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
186d71ae5a4SJacob Faibussowitsch {
18783287d42SBarry Smith   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data, *b;
188c7272abeSHong Zhang   PetscInt           n = a->mbs, bs = A->rmap->bs, bs2 = a->bs2;
189ace3abfcSBarry Smith   PetscBool          row_identity, col_identity, both_identity;
19083287d42SBarry Smith   IS                 isicol;
191c7272abeSHong Zhang   const PetscInt    *r, *ic;
192c7272abeSHong Zhang   PetscInt           i, *ai = a->i, *aj = a->j;
193c7272abeSHong Zhang   PetscInt          *bi, *bj, *ajtmp;
194c7272abeSHong Zhang   PetscInt          *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
195c7272abeSHong Zhang   PetscReal          f;
196c7272abeSHong Zhang   PetscInt           nlnk, *lnk, k, **bi_ptr;
1970298fd71SBarry Smith   PetscFreeSpaceList free_space = NULL, current_space = NULL;
198c7272abeSHong Zhang   PetscBT            lnkbt;
199ece542f9SHong Zhang   PetscBool          missing;
20083287d42SBarry Smith 
20183287d42SBarry Smith   PetscFunctionBegin;
20294bad497SJacob Faibussowitsch   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
2039566063dSJacob Faibussowitsch   PetscCall(MatMissingDiagonal(A, &missing, &i));
20494bad497SJacob Faibussowitsch   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
205ece542f9SHong Zhang 
2066ba06ab7SHong Zhang   if (bs > 1) { /* check shifttype */
20713bcc0bdSJacob Faibussowitsch     PetscCheck(info->shifttype != (PetscReal)MAT_SHIFT_NONZERO && info->shifttype != (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only MAT_SHIFT_NONE and MAT_SHIFT_INBLOCKS are supported for BAIJ matrix");
2086ba06ab7SHong Zhang   }
2096ba06ab7SHong Zhang 
2109566063dSJacob Faibussowitsch   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
2119566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &r));
2129566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isicol, &ic));
21383287d42SBarry Smith 
214bc74c81fSJed Brown   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
2159566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &bi));
2169566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &bdiag));
217a6df321fSShri Abhyankar   bi[0] = bdiag[0] = 0;
218b2b2dd24SShri Abhyankar 
219b2b2dd24SShri Abhyankar   /* linked list for storing column indices of the active row */
220b2b2dd24SShri Abhyankar   nlnk = n + 1;
2219566063dSJacob Faibussowitsch   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
222b2b2dd24SShri Abhyankar 
2239566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));
224b2b2dd24SShri Abhyankar 
225b2b2dd24SShri Abhyankar   /* initial FreeSpace size is f*(ai[n]+1) */
226b2b2dd24SShri Abhyankar   f = info->fill;
2279566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
22826fbe8dcSKarl Rupp 
229b2b2dd24SShri Abhyankar   current_space = free_space;
230b2b2dd24SShri Abhyankar 
231b2b2dd24SShri Abhyankar   for (i = 0; i < n; i++) {
232b2b2dd24SShri Abhyankar     /* copy previous fill into linked list */
233b2b2dd24SShri Abhyankar     nzi   = 0;
234b2b2dd24SShri Abhyankar     nnz   = ai[r[i] + 1] - ai[r[i]];
235b2b2dd24SShri Abhyankar     ajtmp = aj + ai[r[i]];
2369566063dSJacob Faibussowitsch     PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
237b2b2dd24SShri Abhyankar     nzi += nlnk;
238b2b2dd24SShri Abhyankar 
239b2b2dd24SShri Abhyankar     /* add pivot rows into linked list */
240b2b2dd24SShri Abhyankar     row = lnk[n];
241b2b2dd24SShri Abhyankar     while (row < i) {
242b2b2dd24SShri Abhyankar       nzbd  = bdiag[row] + 1;     /* num of entries in the row with column index <= row */
243b2b2dd24SShri Abhyankar       ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
2449566063dSJacob Faibussowitsch       PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
245b2b2dd24SShri Abhyankar       nzi += nlnk;
246b2b2dd24SShri Abhyankar       row = lnk[row];
247b2b2dd24SShri Abhyankar     }
248b2b2dd24SShri Abhyankar     bi[i + 1] = bi[i] + nzi;
249b2b2dd24SShri Abhyankar     im[i]     = nzi;
250b2b2dd24SShri Abhyankar 
251b2b2dd24SShri Abhyankar     /* mark bdiag */
252b2b2dd24SShri Abhyankar     nzbd = 0;
253b2b2dd24SShri Abhyankar     nnz  = nzi;
254b2b2dd24SShri Abhyankar     k    = lnk[n];
255b2b2dd24SShri Abhyankar     while (nnz-- && k < i) {
256b2b2dd24SShri Abhyankar       nzbd++;
257b2b2dd24SShri Abhyankar       k = lnk[k];
258b2b2dd24SShri Abhyankar     }
2592ce24eb6SHong Zhang     bdiag[i] = nzbd; /* note : bdaig[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
260b2b2dd24SShri Abhyankar 
261b2b2dd24SShri Abhyankar     /* if free space is not available, make more free space */
262b2b2dd24SShri Abhyankar     if (current_space->local_remaining < nzi) {
263f91af8c7SBarry Smith       nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - i, nzi)); /* estimated and max additional space needed */
2649566063dSJacob Faibussowitsch       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
265b2b2dd24SShri Abhyankar       reallocs++;
266b2b2dd24SShri Abhyankar     }
267b2b2dd24SShri Abhyankar 
268b2b2dd24SShri Abhyankar     /* copy data into free space, then initialize lnk */
2699566063dSJacob Faibussowitsch     PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));
27026fbe8dcSKarl Rupp 
271b2b2dd24SShri Abhyankar     bi_ptr[i] = current_space->array;
272b2b2dd24SShri Abhyankar     current_space->array += nzi;
273b2b2dd24SShri Abhyankar     current_space->local_used += nzi;
274b2b2dd24SShri Abhyankar     current_space->local_remaining -= nzi;
275b2b2dd24SShri Abhyankar   }
276b2b2dd24SShri Abhyankar 
2779566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &r));
2789566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isicol, &ic));
279b2b2dd24SShri Abhyankar 
2809263d837SHong Zhang   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
2819566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
2829566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag));
2839566063dSJacob Faibussowitsch   PetscCall(PetscLLDestroy(lnk, lnkbt));
2849566063dSJacob Faibussowitsch   PetscCall(PetscFree2(bi_ptr, im));
285b2b2dd24SShri Abhyankar 
286b2b2dd24SShri Abhyankar   /* put together the new matrix */
2879566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJSetPreallocation(B, bs, MAT_SKIP_ALLOCATION, NULL));
288*57508eceSPierre Jolivet   b = (Mat_SeqBAIJ *)B->data;
28926fbe8dcSKarl Rupp 
290b2b2dd24SShri Abhyankar   b->free_ij = PETSC_TRUE;
2919f0612e4SBarry Smith   PetscCall(PetscShmgetAllocateArray((bdiag[0] + 1) * bs2, sizeof(PetscScalar), (void **)&b->a));
2929f0612e4SBarry Smith   b->free_a        = PETSC_TRUE;
293b2b2dd24SShri Abhyankar   b->j             = bj;
294b2b2dd24SShri Abhyankar   b->i             = bi;
295b2b2dd24SShri Abhyankar   b->diag          = bdiag;
296b2b2dd24SShri Abhyankar   b->free_diag     = PETSC_TRUE;
297f4259b30SLisandro Dalcin   b->ilen          = NULL;
298f4259b30SLisandro Dalcin   b->imax          = NULL;
299b2b2dd24SShri Abhyankar   b->row           = isrow;
300b2b2dd24SShri Abhyankar   b->col           = iscol;
301b2b2dd24SShri Abhyankar   b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;
30226fbe8dcSKarl Rupp 
3039566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)isrow));
3049566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)iscol));
305b2b2dd24SShri Abhyankar   b->icol = isicol;
3069566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(bs * n + bs, &b->solve_work));
307b2b2dd24SShri Abhyankar 
308b2b2dd24SShri Abhyankar   b->maxnz = b->nz = bdiag[0] + 1;
30926fbe8dcSKarl Rupp 
310d5f3da31SBarry Smith   B->factortype            = MAT_FACTOR_LU;
311b2b2dd24SShri Abhyankar   B->info.factor_mallocs   = reallocs;
312b2b2dd24SShri Abhyankar   B->info.fill_ratio_given = f;
313b2b2dd24SShri Abhyankar 
314b2b2dd24SShri Abhyankar   if (ai[n] != 0) {
315b2b2dd24SShri Abhyankar     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]);
316b2b2dd24SShri Abhyankar   } else {
317b2b2dd24SShri Abhyankar     B->info.fill_ratio_needed = 0.0;
318b2b2dd24SShri Abhyankar   }
3199263d837SHong Zhang #if defined(PETSC_USE_INFO)
3209263d837SHong Zhang   if (ai[n] != 0) {
3219263d837SHong Zhang     PetscReal af = B->info.fill_ratio_needed;
3229566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
3239566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
3249566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
3259566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "for best performance.\n"));
3269263d837SHong Zhang   } else {
3279566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Empty matrix\n"));
3289263d837SHong Zhang   }
3299263d837SHong Zhang #endif
330b2b2dd24SShri Abhyankar 
3319566063dSJacob Faibussowitsch   PetscCall(ISIdentity(isrow, &row_identity));
3329566063dSJacob Faibussowitsch   PetscCall(ISIdentity(iscol, &col_identity));
33326fbe8dcSKarl Rupp 
334ace3abfcSBarry Smith   both_identity = (PetscBool)(row_identity && col_identity);
33526fbe8dcSKarl Rupp 
3369566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJSetNumericFactorization(B, both_identity));
3373ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
338b2b2dd24SShri Abhyankar }
339b2b2dd24SShri Abhyankar 
340ff6a9541SJacob Faibussowitsch #if 0
341ff6a9541SJacob Faibussowitsch // unused
342ff6a9541SJacob Faibussowitsch static PetscErrorCode MatLUFactorSymbolic_SeqBAIJ_inplace(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info)
343d71ae5a4SJacob Faibussowitsch {
344faca2338SShri Abhyankar   Mat_SeqBAIJ       *a = (Mat_SeqBAIJ *)A->data, *b;
345faca2338SShri Abhyankar   PetscInt           n = a->mbs, bs = A->rmap->bs, bs2 = a->bs2;
346ace3abfcSBarry Smith   PetscBool          row_identity, col_identity, both_identity;
347faca2338SShri Abhyankar   IS                 isicol;
348faca2338SShri Abhyankar   const PetscInt    *r, *ic;
349faca2338SShri Abhyankar   PetscInt           i, *ai = a->i, *aj = a->j;
350faca2338SShri Abhyankar   PetscInt          *bi, *bj, *ajtmp;
351faca2338SShri Abhyankar   PetscInt          *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im;
352faca2338SShri Abhyankar   PetscReal          f;
353faca2338SShri Abhyankar   PetscInt           nlnk, *lnk, k, **bi_ptr;
3540298fd71SBarry Smith   PetscFreeSpaceList free_space = NULL, current_space = NULL;
355faca2338SShri Abhyankar   PetscBT            lnkbt;
356ece542f9SHong Zhang   PetscBool          missing;
357faca2338SShri Abhyankar 
358faca2338SShri Abhyankar   PetscFunctionBegin;
35994bad497SJacob Faibussowitsch   PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square");
3609566063dSJacob Faibussowitsch   PetscCall(MatMissingDiagonal(A, &missing, &i));
36194bad497SJacob Faibussowitsch   PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i);
362ece542f9SHong Zhang 
3639566063dSJacob Faibussowitsch   PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol));
3649566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isrow, &r));
3659566063dSJacob Faibussowitsch   PetscCall(ISGetIndices(isicol, &ic));
366faca2338SShri Abhyankar 
367bc74c81fSJed Brown   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
3689566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &bi));
3699566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(n + 1, &bdiag));
370bc74c81fSJed Brown 
371a6df321fSShri Abhyankar   bi[0] = bdiag[0] = 0;
372c7272abeSHong Zhang 
373c7272abeSHong Zhang   /* linked list for storing column indices of the active row */
374c7272abeSHong Zhang   nlnk = n + 1;
3759566063dSJacob Faibussowitsch   PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt));
376c7272abeSHong Zhang 
3779566063dSJacob Faibussowitsch   PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im));
378c7272abeSHong Zhang 
379c7272abeSHong Zhang   /* initial FreeSpace size is f*(ai[n]+1) */
380c7272abeSHong Zhang   f = info->fill;
3819566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space));
382c7272abeSHong Zhang   current_space = free_space;
38383287d42SBarry Smith 
38483287d42SBarry Smith   for (i = 0; i < n; i++) {
385c7272abeSHong Zhang     /* copy previous fill into linked list */
38683287d42SBarry Smith     nzi   = 0;
387c7272abeSHong Zhang     nnz   = ai[r[i] + 1] - ai[r[i]];
388c7272abeSHong Zhang     ajtmp = aj + ai[r[i]];
3899566063dSJacob Faibussowitsch     PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt));
390c7272abeSHong Zhang     nzi += nlnk;
391c7272abeSHong Zhang 
392c7272abeSHong Zhang     /* add pivot rows into linked list */
393c7272abeSHong Zhang     row = lnk[n];
394c7272abeSHong Zhang     while (row < i) {
395c7272abeSHong Zhang       nzbd  = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
396c7272abeSHong Zhang       ajtmp = bi_ptr[row] + nzbd;       /* points to the entry next to the diagonal */
3979566063dSJacob Faibussowitsch       PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im));
398c7272abeSHong Zhang       nzi += nlnk;
399c7272abeSHong Zhang       row = lnk[row];
40083287d42SBarry Smith     }
401c7272abeSHong Zhang     bi[i + 1] = bi[i] + nzi;
402c7272abeSHong Zhang     im[i]     = nzi;
403c7272abeSHong Zhang 
404c7272abeSHong Zhang     /* mark bdiag */
405c7272abeSHong Zhang     nzbd = 0;
406c7272abeSHong Zhang     nnz  = nzi;
407c7272abeSHong Zhang     k    = lnk[n];
408c7272abeSHong Zhang     while (nnz-- && k < i) {
409c7272abeSHong Zhang       nzbd++;
410c7272abeSHong Zhang       k = lnk[k];
411c7272abeSHong Zhang     }
412c7272abeSHong Zhang     bdiag[i] = bi[i] + nzbd;
413c7272abeSHong Zhang 
414c7272abeSHong Zhang     /* if free space is not available, make more free space */
415c7272abeSHong Zhang     if (current_space->local_remaining < nzi) {
416f91af8c7SBarry Smith       nnz = PetscIntMultTruncate(n - i, nzi); /* estimated and max additional space needed */
4179566063dSJacob Faibussowitsch       PetscCall(PetscFreeSpaceGet(nnz, &current_space));
418c7272abeSHong Zhang       reallocs++;
41983287d42SBarry Smith     }
42083287d42SBarry Smith 
421c7272abeSHong Zhang     /* copy data into free space, then initialize lnk */
4229566063dSJacob Faibussowitsch     PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt));
42326fbe8dcSKarl Rupp 
424c7272abeSHong Zhang     bi_ptr[i] = current_space->array;
425c7272abeSHong Zhang     current_space->array += nzi;
426c7272abeSHong Zhang     current_space->local_used += nzi;
427c7272abeSHong Zhang     current_space->local_remaining -= nzi;
428c7272abeSHong Zhang   }
4296cf91177SBarry Smith   #if defined(PETSC_USE_INFO)
43083287d42SBarry Smith   if (ai[n] != 0) {
431c7272abeSHong Zhang     PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
4329566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af));
4339566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af));
4349566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af));
4359566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "for best performance.\n"));
43683287d42SBarry Smith   } else {
4379566063dSJacob Faibussowitsch     PetscCall(PetscInfo(A, "Empty matrix\n"));
43883287d42SBarry Smith   }
43963ba0a88SBarry Smith   #endif
44083287d42SBarry Smith 
4419566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isrow, &r));
4429566063dSJacob Faibussowitsch   PetscCall(ISRestoreIndices(isicol, &ic));
44383287d42SBarry Smith 
444c7272abeSHong Zhang   /* destroy list of free space and other temporary array(s) */
4459566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(bi[n] + 1, &bj));
4469566063dSJacob Faibussowitsch   PetscCall(PetscFreeSpaceContiguous(&free_space, bj));
4479566063dSJacob Faibussowitsch   PetscCall(PetscLLDestroy(lnk, lnkbt));
4489566063dSJacob Faibussowitsch   PetscCall(PetscFree2(bi_ptr, im));
44983287d42SBarry Smith 
45083287d42SBarry Smith   /* put together the new matrix */
4519566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJSetPreallocation(B, bs, MAT_SKIP_ALLOCATION, NULL));
452*57508eceSPierre Jolivet   b = (Mat_SeqBAIJ *)B->data;
453e6b907acSBarry Smith   b->free_ij      = PETSC_TRUE;
4549f0612e4SBarry Smith   PetscCall(PetscShmgetAllocateArray((bi[n] + 1) * bs2,,sizeof(PetscScalar),(void **)&b->a));
4559f0612e4SBarry Smith   b->free_a       = PETSC_TRUE;
456c7272abeSHong Zhang   b->j             = bj;
457c7272abeSHong Zhang   b->i             = bi;
458c7272abeSHong Zhang   b->diag          = bdiag;
4597f53bb6cSHong Zhang   b->free_diag     = PETSC_TRUE;
460f4259b30SLisandro Dalcin   b->ilen          = NULL;
461f4259b30SLisandro Dalcin   b->imax          = NULL;
46283287d42SBarry Smith   b->row           = isrow;
46383287d42SBarry Smith   b->col           = iscol;
464d3d32019SBarry Smith   b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;
46526fbe8dcSKarl Rupp 
4669566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)isrow));
4679566063dSJacob Faibussowitsch   PetscCall(PetscObjectReference((PetscObject)iscol));
46883287d42SBarry Smith   b->icol = isicol;
46926fbe8dcSKarl Rupp 
4709566063dSJacob Faibussowitsch   PetscCall(PetscMalloc1(bs * n + bs, &b->solve_work));
47183287d42SBarry Smith 
472c7272abeSHong Zhang   b->maxnz = b->nz = bi[n];
47326fbe8dcSKarl Rupp 
474*57508eceSPierre Jolivet   B->factortype            = MAT_FACTOR_LU;
475*57508eceSPierre Jolivet   B->info.factor_mallocs   = reallocs;
476*57508eceSPierre Jolivet   B->info.fill_ratio_given = f;
477c7272abeSHong Zhang 
47883287d42SBarry Smith   if (ai[n] != 0) {
479*57508eceSPierre Jolivet     B->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]);
48083287d42SBarry Smith   } else {
481*57508eceSPierre Jolivet     B->info.fill_ratio_needed = 0.0;
48283287d42SBarry Smith   }
483c7272abeSHong Zhang 
4849566063dSJacob Faibussowitsch   PetscCall(ISIdentity(isrow, &row_identity));
4859566063dSJacob Faibussowitsch   PetscCall(ISIdentity(iscol, &col_identity));
48626fbe8dcSKarl Rupp 
487ace3abfcSBarry Smith   both_identity = (PetscBool)(row_identity && col_identity);
48826fbe8dcSKarl Rupp 
4899566063dSJacob Faibussowitsch   PetscCall(MatSeqBAIJSetNumericFactorization_inplace(B, both_identity));
4903ba16761SJacob Faibussowitsch   PetscFunctionReturn(PETSC_SUCCESS);
49183287d42SBarry Smith }
492ff6a9541SJacob Faibussowitsch #endif
493