xref: /petsc/src/mat/impls/baij/seq/baijfact3.c (revision 503c0ea9b45bcfbcebbb1ea5341243bbc69f0bea)
1 
2 /*
3     Factorization code for BAIJ format.
4 */
5 #include <../src/mat/impls/baij/seq/baij.h>
6 #include <petsc/private/kernels/blockinvert.h>
7 
8 /*
9    This is used to set the numeric factorization for both LU and ILU symbolic factorization
10 */
11 PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat fact,PetscBool natural)
12 {
13   PetscFunctionBegin;
14   if (natural) {
15     switch (fact->rmap->bs) {
16     case 1:
17       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
18       break;
19     case 2:
20       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering;
21       break;
22     case 3:
23       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering;
24       break;
25     case 4:
26       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
27       break;
28     case 5:
29       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
30       break;
31     case 6:
32       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering;
33       break;
34     case 7:
35       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering;
36       break;
37     case 9:
38 #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
39       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_9_NaturalOrdering;
40 #else
41       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
42 #endif
43       break;
44     case 15:
45       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_15_NaturalOrdering;
46       break;
47     default:
48       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
49       break;
50     }
51   } else {
52     switch (fact->rmap->bs) {
53     case 1:
54       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
55       break;
56     case 2:
57       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
58       break;
59     case 3:
60       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
61       break;
62     case 4:
63       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
64       break;
65     case 5:
66       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
67       break;
68     case 6:
69       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
70       break;
71     case 7:
72       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
73       break;
74     default:
75       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
76       break;
77     }
78   }
79   PetscFunctionReturn(0);
80 }
81 
82 PetscErrorCode MatSeqBAIJSetNumericFactorization_inplace(Mat inA,PetscBool natural)
83 {
84   PetscFunctionBegin;
85   if (natural) {
86     switch (inA->rmap->bs) {
87     case 1:
88       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace;
89       break;
90     case 2:
91       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_inplace;
92       break;
93     case 3:
94       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering_inplace;
95       break;
96     case 4:
97 #if defined(PETSC_USE_REAL_MAT_SINGLE)
98       {
99         PetscBool sse_enabled_local;
100         PetscCall(PetscSSEIsEnabled(inA->comm,&sse_enabled_local,NULL));
101         if (sse_enabled_local) {
102 #  if defined(PETSC_HAVE_SSE)
103           int i,*AJ=a->j,nz=a->nz,n=a->mbs;
104           if (n==(unsigned short)n) {
105             unsigned short *aj=(unsigned short*)AJ;
106             for (i=0; i<nz; i++) aj[i] = (unsigned short)AJ[i];
107 
108             inA->ops->setunfactored   = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE_usj;
109             inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE_usj;
110 
111             PetscCall(PetscInfo(inA,"Using special SSE, in-place natural ordering, ushort j index factor BS=4\n"));
112           } else {
113             /* Scale the column indices for easier indexing in MatSolve. */
114 /*            for (i=0;i<nz;i++) { */
115 /*              AJ[i] = AJ[i]*4; */
116 /*            } */
117             inA->ops->setunfactored   = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE;
118             inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE;
119 
120             PetscCall(PetscInfo(inA,"Using special SSE, in-place natural ordering, int j index factor BS=4\n"));
121           }
122 #  else
123           /* This should never be reached.  If so, problem in PetscSSEIsEnabled. */
124           SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSE Hardware unavailable");
125 #  endif
126         } else {
127           inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace;
128         }
129       }
130 #else
131       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace;
132 #endif
133       break;
134     case 5:
135       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering_inplace;
136       break;
137     case 6:
138       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering_inplace;
139       break;
140     case 7:
141       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering_inplace;
142       break;
143     default:
144       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace;
145       break;
146     }
147   } else {
148     switch (inA->rmap->bs) {
149     case 1:
150       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace;
151       break;
152     case 2:
153       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_inplace;
154       break;
155     case 3:
156       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_inplace;
157       break;
158     case 4:
159       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_inplace;
160       break;
161     case 5:
162       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_inplace;
163       break;
164     case 6:
165       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_inplace;
166       break;
167     case 7:
168       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_inplace;
169       break;
170     default:
171       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace;
172       break;
173     }
174   }
175   PetscFunctionReturn(0);
176 }
177 
178 /*
179     The symbolic factorization code is identical to that for AIJ format,
180   except for very small changes since this is now a SeqBAIJ datastructure.
181   NOT good code reuse.
182 */
183 #include <petscbt.h>
184 #include <../src/mat/utils/freespace.h>
185 
186 PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
187 {
188   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data,*b;
189   PetscInt           n  =a->mbs,bs = A->rmap->bs,bs2=a->bs2;
190   PetscBool          row_identity,col_identity,both_identity;
191   IS                 isicol;
192   const PetscInt     *r,*ic;
193   PetscInt           i,*ai=a->i,*aj=a->j;
194   PetscInt           *bi,*bj,*ajtmp;
195   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
196   PetscReal          f;
197   PetscInt           nlnk,*lnk,k,**bi_ptr;
198   PetscFreeSpaceList free_space=NULL,current_space=NULL;
199   PetscBT            lnkbt;
200   PetscBool          missing;
201 
202   PetscFunctionBegin;
203   PetscCheck(A->rmap->N == A->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
204   PetscCall(MatMissingDiagonal(A,&missing,&i));
205   PetscCheck(!missing,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %" PetscInt_FMT,i);
206 
207   if (bs>1) {  /* check shifttype */
208     PetscCheckFalse(info->shifttype == MAT_SHIFT_NONZERO || info->shifttype == MAT_SHIFT_POSITIVE_DEFINITE,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only MAT_SHIFT_NONE and MAT_SHIFT_INBLOCKS are supported for BAIJ matrix");
209   }
210 
211   PetscCall(ISInvertPermutation(iscol,PETSC_DECIDE,&isicol));
212   PetscCall(ISGetIndices(isrow,&r));
213   PetscCall(ISGetIndices(isicol,&ic));
214 
215   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
216   PetscCall(PetscMalloc1(n+1,&bi));
217   PetscCall(PetscMalloc1(n+1,&bdiag));
218   bi[0] = bdiag[0] = 0;
219 
220   /* linked list for storing column indices of the active row */
221   nlnk = n + 1;
222   PetscCall(PetscLLCreate(n,n,nlnk,lnk,lnkbt));
223 
224   PetscCall(PetscMalloc2(n+1,&bi_ptr,n+1,&im));
225 
226   /* initial FreeSpace size is f*(ai[n]+1) */
227   f    = info->fill;
228   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space));
229 
230   current_space = free_space;
231 
232   for (i=0; i<n; i++) {
233     /* copy previous fill into linked list */
234     nzi = 0;
235     nnz = ai[r[i]+1] - ai[r[i]];
236     ajtmp = aj + ai[r[i]];
237     PetscCall(PetscLLAddPerm(nnz,ajtmp,ic,n,&nlnk,lnk,lnkbt));
238     nzi  += nlnk;
239 
240     /* add pivot rows into linked list */
241     row = lnk[n];
242     while (row < i) {
243       nzbd  = bdiag[row] + 1;   /* num of entries in the row with column index <= row */
244       ajtmp = bi_ptr[row] + nzbd;   /* points to the entry next to the diagonal */
245       PetscCall(PetscLLAddSortedLU(ajtmp,row,&nlnk,lnk,lnkbt,i,nzbd,im));
246       nzi  += nlnk;
247       row   = lnk[row];
248     }
249     bi[i+1] = bi[i] + nzi;
250     im[i]   = nzi;
251 
252     /* mark bdiag */
253     nzbd = 0;
254     nnz  = nzi;
255     k    = lnk[n];
256     while (nnz-- && k < i) {
257       nzbd++;
258       k = lnk[k];
259     }
260     bdiag[i] = nzbd; /* note : bdaig[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */
261 
262     /* if free space is not available, make more free space */
263     if (current_space->local_remaining<nzi) {
264       nnz  = PetscIntMultTruncate(2,PetscIntMultTruncate(n - i,nzi)); /* estimated and max additional space needed */
265       PetscCall(PetscFreeSpaceGet(nnz,&current_space));
266       reallocs++;
267     }
268 
269     /* copy data into free space, then initialize lnk */
270     PetscCall(PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt));
271 
272     bi_ptr[i]                       = current_space->array;
273     current_space->array           += nzi;
274     current_space->local_used      += nzi;
275     current_space->local_remaining -= nzi;
276   }
277 
278   PetscCall(ISRestoreIndices(isrow,&r));
279   PetscCall(ISRestoreIndices(isicol,&ic));
280 
281   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
282   PetscCall(PetscMalloc1(bi[n]+1,&bj));
283   PetscCall(PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag));
284   PetscCall(PetscLLDestroy(lnk,lnkbt));
285   PetscCall(PetscFree2(bi_ptr,im));
286 
287   /* put together the new matrix */
288   PetscCall(MatSeqBAIJSetPreallocation(B,bs,MAT_SKIP_ALLOCATION,NULL));
289   PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)isicol));
290   b    = (Mat_SeqBAIJ*)(B)->data;
291 
292   b->free_a       = PETSC_TRUE;
293   b->free_ij      = PETSC_TRUE;
294   b->singlemalloc = PETSC_FALSE;
295 
296   PetscCall(PetscMalloc1((bdiag[0]+1)*bs2,&b->a));
297   b->j             = bj;
298   b->i             = bi;
299   b->diag          = bdiag;
300   b->free_diag     = PETSC_TRUE;
301   b->ilen          = NULL;
302   b->imax          = NULL;
303   b->row           = isrow;
304   b->col           = iscol;
305   b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;
306 
307   PetscCall(PetscObjectReference((PetscObject)isrow));
308   PetscCall(PetscObjectReference((PetscObject)iscol));
309   b->icol = isicol;
310   PetscCall(PetscMalloc1(bs*n+bs,&b->solve_work));
311   PetscCall(PetscLogObjectMemory((PetscObject)B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2)));
312 
313   b->maxnz = b->nz = bdiag[0]+1;
314 
315   B->factortype            =  MAT_FACTOR_LU;
316   B->info.factor_mallocs   = reallocs;
317   B->info.fill_ratio_given = f;
318 
319   if (ai[n] != 0) {
320     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
321   } else {
322     B->info.fill_ratio_needed = 0.0;
323   }
324 #if defined(PETSC_USE_INFO)
325   if (ai[n] != 0) {
326     PetscReal af = B->info.fill_ratio_needed;
327     PetscCall(PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af));
328     PetscCall(PetscInfo(A,"Run with -pc_factor_fill %g or use \n",(double)af));
329     PetscCall(PetscInfo(A,"PCFactorSetFill(pc,%g);\n",(double)af));
330     PetscCall(PetscInfo(A,"for best performance.\n"));
331   } else {
332     PetscCall(PetscInfo(A,"Empty matrix\n"));
333   }
334 #endif
335 
336   PetscCall(ISIdentity(isrow,&row_identity));
337   PetscCall(ISIdentity(iscol,&col_identity));
338 
339   both_identity = (PetscBool) (row_identity && col_identity);
340 
341   PetscCall(MatSeqBAIJSetNumericFactorization(B,both_identity));
342   PetscFunctionReturn(0);
343 }
344 
345 PetscErrorCode MatLUFactorSymbolic_SeqBAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
346 {
347   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data,*b;
348   PetscInt           n  =a->mbs,bs = A->rmap->bs,bs2=a->bs2;
349   PetscBool          row_identity,col_identity,both_identity;
350   IS                 isicol;
351   const PetscInt     *r,*ic;
352   PetscInt           i,*ai=a->i,*aj=a->j;
353   PetscInt           *bi,*bj,*ajtmp;
354   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
355   PetscReal          f;
356   PetscInt           nlnk,*lnk,k,**bi_ptr;
357   PetscFreeSpaceList free_space=NULL,current_space=NULL;
358   PetscBT            lnkbt;
359   PetscBool          missing;
360 
361   PetscFunctionBegin;
362   PetscCheck(A->rmap->N == A->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square");
363   PetscCall(MatMissingDiagonal(A,&missing,&i));
364   PetscCheck(!missing,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %" PetscInt_FMT,i);
365 
366   PetscCall(ISInvertPermutation(iscol,PETSC_DECIDE,&isicol));
367   PetscCall(ISGetIndices(isrow,&r));
368   PetscCall(ISGetIndices(isicol,&ic));
369 
370   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
371   PetscCall(PetscMalloc1(n+1,&bi));
372   PetscCall(PetscMalloc1(n+1,&bdiag));
373 
374   bi[0] = bdiag[0] = 0;
375 
376   /* linked list for storing column indices of the active row */
377   nlnk = n + 1;
378   PetscCall(PetscLLCreate(n,n,nlnk,lnk,lnkbt));
379 
380   PetscCall(PetscMalloc2(n+1,&bi_ptr,n+1,&im));
381 
382   /* initial FreeSpace size is f*(ai[n]+1) */
383   f             = info->fill;
384   PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space));
385   current_space = free_space;
386 
387   for (i=0; i<n; i++) {
388     /* copy previous fill into linked list */
389     nzi = 0;
390     nnz = ai[r[i]+1] - ai[r[i]];
391     ajtmp = aj + ai[r[i]];
392     PetscCall(PetscLLAddPerm(nnz,ajtmp,ic,n,&nlnk,lnk,lnkbt));
393     nzi  += nlnk;
394 
395     /* add pivot rows into linked list */
396     row = lnk[n];
397     while (row < i) {
398       nzbd  = bdiag[row] - bi[row] + 1;   /* num of entries in the row with column index <= row */
399       ajtmp = bi_ptr[row] + nzbd;   /* points to the entry next to the diagonal */
400       PetscCall(PetscLLAddSortedLU(ajtmp,row,&nlnk,lnk,lnkbt,i,nzbd,im));
401       nzi  += nlnk;
402       row   = lnk[row];
403     }
404     bi[i+1] = bi[i] + nzi;
405     im[i]   = nzi;
406 
407     /* mark bdiag */
408     nzbd = 0;
409     nnz  = nzi;
410     k    = lnk[n];
411     while (nnz-- && k < i) {
412       nzbd++;
413       k = lnk[k];
414     }
415     bdiag[i] = bi[i] + nzbd;
416 
417     /* if free space is not available, make more free space */
418     if (current_space->local_remaining<nzi) {
419       nnz  = PetscIntMultTruncate(n - i,nzi); /* estimated and max additional space needed */
420       PetscCall(PetscFreeSpaceGet(nnz,&current_space));
421       reallocs++;
422     }
423 
424     /* copy data into free space, then initialize lnk */
425     PetscCall(PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt));
426 
427     bi_ptr[i]                       = current_space->array;
428     current_space->array           += nzi;
429     current_space->local_used      += nzi;
430     current_space->local_remaining -= nzi;
431   }
432 #if defined(PETSC_USE_INFO)
433   if (ai[n] != 0) {
434     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
435     PetscCall(PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af));
436     PetscCall(PetscInfo(A,"Run with -pc_factor_fill %g or use \n",(double)af));
437     PetscCall(PetscInfo(A,"PCFactorSetFill(pc,%g);\n",(double)af));
438     PetscCall(PetscInfo(A,"for best performance.\n"));
439   } else {
440     PetscCall(PetscInfo(A,"Empty matrix\n"));
441   }
442 #endif
443 
444   PetscCall(ISRestoreIndices(isrow,&r));
445   PetscCall(ISRestoreIndices(isicol,&ic));
446 
447   /* destroy list of free space and other temporary array(s) */
448   PetscCall(PetscMalloc1(bi[n]+1,&bj));
449   PetscCall(PetscFreeSpaceContiguous(&free_space,bj));
450   PetscCall(PetscLLDestroy(lnk,lnkbt));
451   PetscCall(PetscFree2(bi_ptr,im));
452 
453   /* put together the new matrix */
454   PetscCall(MatSeqBAIJSetPreallocation(B,bs,MAT_SKIP_ALLOCATION,NULL));
455   PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)isicol));
456   b    = (Mat_SeqBAIJ*)(B)->data;
457 
458   b->free_a       = PETSC_TRUE;
459   b->free_ij      = PETSC_TRUE;
460   b->singlemalloc = PETSC_FALSE;
461 
462   PetscCall(PetscMalloc1((bi[n]+1)*bs2,&b->a));
463   b->j             = bj;
464   b->i             = bi;
465   b->diag          = bdiag;
466   b->free_diag     = PETSC_TRUE;
467   b->ilen          = NULL;
468   b->imax          = NULL;
469   b->row           = isrow;
470   b->col           = iscol;
471   b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;
472 
473   PetscCall(PetscObjectReference((PetscObject)isrow));
474   PetscCall(PetscObjectReference((PetscObject)iscol));
475   b->icol = isicol;
476 
477   PetscCall(PetscMalloc1(bs*n+bs,&b->solve_work));
478   PetscCall(PetscLogObjectMemory((PetscObject)B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2)));
479 
480   b->maxnz = b->nz = bi[n];
481 
482   (B)->factortype            =  MAT_FACTOR_LU;
483   (B)->info.factor_mallocs   = reallocs;
484   (B)->info.fill_ratio_given = f;
485 
486   if (ai[n] != 0) {
487     (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
488   } else {
489     (B)->info.fill_ratio_needed = 0.0;
490   }
491 
492   PetscCall(ISIdentity(isrow,&row_identity));
493   PetscCall(ISIdentity(iscol,&col_identity));
494 
495   both_identity = (PetscBool) (row_identity && col_identity);
496 
497   PetscCall(MatSeqBAIJSetNumericFactorization_inplace(B,both_identity));
498   PetscFunctionReturn(0);
499 }
500