xref: /petsc/src/mat/impls/baij/seq/baijfact3.c (revision c60f02097ec1bad1d486ea4e6635560ad6843df9)
1be1d678aSKris Buschelman #define PETSCMAT_DLL
2be1d678aSKris Buschelman 
383287d42SBarry Smith /*
483287d42SBarry Smith     Factorization code for BAIJ format.
583287d42SBarry Smith */
67c4f633dSBarry Smith #include "../src/mat/impls/baij/seq/baij.h"
7*c60f0209SBarry Smith #include "../src/mat/blockinvert.h"
883287d42SBarry Smith 
9db4efbfdSBarry Smith #undef __FUNCT__
10db4efbfdSBarry Smith #define __FUNCT__ "MatSeqBAIJSetNumericFactorization"
11db4efbfdSBarry Smith /*
12db4efbfdSBarry Smith    This is used to set the numeric factorization for both LU and ILU symbolic factorization
13db4efbfdSBarry Smith */
14db4efbfdSBarry Smith PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat inA,PetscTruth natural)
15db4efbfdSBarry Smith {
16db4efbfdSBarry Smith   PetscFunctionBegin;
17db4efbfdSBarry Smith   if (natural) {
18db4efbfdSBarry Smith     switch (inA->rmap->bs) {
19db4efbfdSBarry Smith     case 1:
20db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
21db4efbfdSBarry Smith       break;
22db4efbfdSBarry Smith     case 2:
23db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering;
24db4efbfdSBarry Smith       break;
25db4efbfdSBarry Smith     case 3:
26db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering;
27db4efbfdSBarry Smith       break;
28db4efbfdSBarry Smith     case 4:
29db4efbfdSBarry Smith #if defined(PETSC_USE_MAT_SINGLE)
30db4efbfdSBarry Smith       {
31db4efbfdSBarry Smith         PetscTruth  sse_enabled_local;
32db4efbfdSBarry Smith         PetscErrorCode ierr;
33db4efbfdSBarry Smith         ierr = PetscSSEIsEnabled(inA->comm,&sse_enabled_local,PETSC_NULL);CHKERRQ(ierr);
34db4efbfdSBarry Smith         if (sse_enabled_local) {
35db4efbfdSBarry Smith #  if defined(PETSC_HAVE_SSE)
36db4efbfdSBarry Smith           int i,*AJ=a->j,nz=a->nz,n=a->mbs;
37db4efbfdSBarry Smith           if (n==(unsigned short)n) {
38db4efbfdSBarry Smith             unsigned short *aj=(unsigned short *)AJ;
39db4efbfdSBarry Smith             for (i=0;i<nz;i++) {
40db4efbfdSBarry Smith               aj[i] = (unsigned short)AJ[i];
41db4efbfdSBarry Smith             }
42db4efbfdSBarry Smith             inA->ops->setunfactored   = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE_usj;
43db4efbfdSBarry Smith             inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE_usj;
44db4efbfdSBarry Smith             ierr = PetscInfo(inA,"Using special SSE, in-place natural ordering, ushort j index factor BS=4\n");CHKERRQ(ierr);
45db4efbfdSBarry Smith           } else {
46db4efbfdSBarry Smith         /* Scale the column indices for easier indexing in MatSolve. */
47db4efbfdSBarry Smith /*            for (i=0;i<nz;i++) { */
48db4efbfdSBarry Smith /*              AJ[i] = AJ[i]*4; */
49db4efbfdSBarry Smith /*            } */
50db4efbfdSBarry Smith             inA->ops->setunfactored   = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE;
51db4efbfdSBarry Smith             inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE;
52db4efbfdSBarry Smith             ierr = PetscInfo(inA,"Using special SSE, in-place natural ordering, int j index factor BS=4\n");CHKERRQ(ierr);
53db4efbfdSBarry Smith           }
54db4efbfdSBarry Smith #  else
55db4efbfdSBarry Smith         /* This should never be reached.  If so, problem in PetscSSEIsEnabled. */
56db4efbfdSBarry Smith           SETERRQ(PETSC_ERR_SUP,"SSE Hardware unavailable");
57db4efbfdSBarry Smith #  endif
58db4efbfdSBarry Smith         } else {
59db4efbfdSBarry Smith           inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
60db4efbfdSBarry Smith         }
61db4efbfdSBarry Smith       }
62db4efbfdSBarry Smith #else
63db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
64db4efbfdSBarry Smith #endif
65db4efbfdSBarry Smith       break;
66db4efbfdSBarry Smith     case 5:
67db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
68db4efbfdSBarry Smith       break;
69db4efbfdSBarry Smith     case 6:
70db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering;
71db4efbfdSBarry Smith       break;
72db4efbfdSBarry Smith     case 7:
73db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering;
74db4efbfdSBarry Smith       break;
75db4efbfdSBarry Smith     default:
76db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
77db4efbfdSBarry Smith       break;
78db4efbfdSBarry Smith     }
79db4efbfdSBarry Smith   } else {
80db4efbfdSBarry Smith     switch (inA->rmap->bs) {
81db4efbfdSBarry Smith     case 1:
82db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
83db4efbfdSBarry Smith       break;
84db4efbfdSBarry Smith     case 2:
85db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
86db4efbfdSBarry Smith       break;
87db4efbfdSBarry Smith     case 3:
88db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
89db4efbfdSBarry Smith       break;
90db4efbfdSBarry Smith     case 4:
91db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
92db4efbfdSBarry Smith       break;
93db4efbfdSBarry Smith     case 5:
94db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
95db4efbfdSBarry Smith       break;
96db4efbfdSBarry Smith     case 6:
97db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
98db4efbfdSBarry Smith       break;
99db4efbfdSBarry Smith     case 7:
100db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
101db4efbfdSBarry Smith       break;
102db4efbfdSBarry Smith     default:
103db4efbfdSBarry Smith       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
104db4efbfdSBarry Smith       break;
105db4efbfdSBarry Smith     }
106db4efbfdSBarry Smith   }
107db4efbfdSBarry Smith   PetscFunctionReturn(0);
108db4efbfdSBarry Smith }
109db4efbfdSBarry Smith 
11083287d42SBarry Smith /*
11183287d42SBarry Smith     The symbolic factorization code is identical to that for AIJ format,
11283287d42SBarry Smith   except for very small changes since this is now a SeqBAIJ datastructure.
11383287d42SBarry Smith   NOT good code reuse.
11483287d42SBarry Smith */
115c7272abeSHong Zhang #include "petscbt.h"
116c7272abeSHong Zhang #include "../src/mat/utils/freespace.h"
117c7272abeSHong Zhang 
1184a2ae208SSatish Balay #undef __FUNCT__
1194a2ae208SSatish Balay #define __FUNCT__ "MatLUFactorSymbolic_SeqBAIJ"
1200481f469SBarry Smith PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
12183287d42SBarry Smith {
12283287d42SBarry Smith   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data,*b;
123c7272abeSHong Zhang   PetscInt           n=a->mbs,bs = A->rmap->bs,bs2=a->bs2;
124c7272abeSHong Zhang   PetscTruth         row_identity,col_identity,both_identity;
12583287d42SBarry Smith   IS                 isicol;
1266849ba73SBarry Smith   PetscErrorCode     ierr;
127c7272abeSHong Zhang   const PetscInt     *r,*ic;
128c7272abeSHong Zhang   PetscInt           i,*ai=a->i,*aj=a->j;
129c7272abeSHong Zhang   PetscInt           *bi,*bj,*ajtmp;
130c7272abeSHong Zhang   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
131c7272abeSHong Zhang   PetscReal          f;
132c7272abeSHong Zhang   PetscInt           nlnk,*lnk,k,**bi_ptr;
133c7272abeSHong Zhang   PetscFreeSpaceList free_space=PETSC_NULL,current_space=PETSC_NULL;
134c7272abeSHong Zhang   PetscBT            lnkbt;
13583287d42SBarry Smith 
13683287d42SBarry Smith   PetscFunctionBegin;
137d0f46423SBarry Smith   if (A->rmap->N != A->cmap->N) SETERRQ(PETSC_ERR_ARG_WRONG,"matrix must be square");
13883287d42SBarry Smith   ierr = ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);CHKERRQ(ierr);
13983287d42SBarry Smith   ierr = ISGetIndices(isrow,&r);CHKERRQ(ierr);
14083287d42SBarry Smith   ierr = ISGetIndices(isicol,&ic);CHKERRQ(ierr);
14183287d42SBarry Smith 
14283287d42SBarry Smith   /* get new row pointers */
143c7272abeSHong Zhang   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bi);CHKERRQ(ierr);
144c7272abeSHong Zhang   bi[0] = 0;
145c7272abeSHong Zhang 
146c7272abeSHong Zhang   /* bdiag is location of diagonal in factor */
147c7272abeSHong Zhang   ierr = PetscMalloc((n+1)*sizeof(PetscInt),&bdiag);CHKERRQ(ierr);
148c7272abeSHong Zhang   bdiag[0] = 0;
149c7272abeSHong Zhang 
150c7272abeSHong Zhang   /* linked list for storing column indices of the active row */
151c7272abeSHong Zhang   nlnk = n + 1;
152c7272abeSHong Zhang   ierr = PetscLLCreate(n,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
153c7272abeSHong Zhang 
154c7272abeSHong Zhang   ierr = PetscMalloc2(n+1,PetscInt**,&bi_ptr,n+1,PetscInt,&im);CHKERRQ(ierr);
155c7272abeSHong Zhang 
156c7272abeSHong Zhang   /* initial FreeSpace size is f*(ai[n]+1) */
157c7272abeSHong Zhang   f = info->fill;
158c7272abeSHong Zhang   ierr = PetscFreeSpaceGet((PetscInt)(f*(ai[n]+1)),&free_space);CHKERRQ(ierr);
159c7272abeSHong Zhang   current_space = free_space;
16083287d42SBarry Smith 
16183287d42SBarry Smith   for (i=0; i<n; i++) {
162c7272abeSHong Zhang     /* copy previous fill into linked list */
16383287d42SBarry Smith     nzi = 0;
164c7272abeSHong Zhang     nnz = ai[r[i]+1] - ai[r[i]];
165c7272abeSHong Zhang     if (!nnz) SETERRQ2(PETSC_ERR_MAT_LU_ZRPVT,"Empty row in matrix: row in original ordering %D in permuted ordering %D",r[i],i);
166c7272abeSHong Zhang     ajtmp = aj + ai[r[i]];
167c7272abeSHong Zhang     ierr = PetscLLAddPerm(nnz,ajtmp,ic,n,nlnk,lnk,lnkbt);CHKERRQ(ierr);
168c7272abeSHong Zhang     nzi += nlnk;
169c7272abeSHong Zhang 
170c7272abeSHong Zhang     /* add pivot rows into linked list */
171c7272abeSHong Zhang     row = lnk[n];
172c7272abeSHong Zhang     while (row < i) {
173c7272abeSHong Zhang       nzbd    = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */
174c7272abeSHong Zhang       ajtmp   = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */
175c7272abeSHong Zhang       ierr = PetscLLAddSortedLU(ajtmp,row,nlnk,lnk,lnkbt,i,nzbd,im);CHKERRQ(ierr);
176c7272abeSHong Zhang       nzi += nlnk;
177c7272abeSHong Zhang       row  = lnk[row];
17883287d42SBarry Smith     }
179c7272abeSHong Zhang     bi[i+1] = bi[i] + nzi;
180c7272abeSHong Zhang     im[i]   = nzi;
181c7272abeSHong Zhang 
182c7272abeSHong Zhang     /* mark bdiag */
183c7272abeSHong Zhang     nzbd = 0;
184c7272abeSHong Zhang     nnz  = nzi;
185c7272abeSHong Zhang     k    = lnk[n];
186c7272abeSHong Zhang     while (nnz-- && k < i){
187c7272abeSHong Zhang       nzbd++;
188c7272abeSHong Zhang       k = lnk[k];
189c7272abeSHong Zhang     }
190c7272abeSHong Zhang     bdiag[i] = bi[i] + nzbd;
191c7272abeSHong Zhang 
192c7272abeSHong Zhang     /* if free space is not available, make more free space */
193c7272abeSHong Zhang     if (current_space->local_remaining<nzi) {
194c7272abeSHong Zhang       nnz = (n - i)*nzi; /* estimated and max additional space needed */
195c7272abeSHong Zhang       ierr = PetscFreeSpaceGet(nnz,&current_space);CHKERRQ(ierr);
196c7272abeSHong Zhang       reallocs++;
19783287d42SBarry Smith     }
19883287d42SBarry Smith 
199c7272abeSHong Zhang     /* copy data into free space, then initialize lnk */
200c7272abeSHong Zhang     ierr = PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);CHKERRQ(ierr);
201c7272abeSHong Zhang     bi_ptr[i] = current_space->array;
202c7272abeSHong Zhang     current_space->array           += nzi;
203c7272abeSHong Zhang     current_space->local_used      += nzi;
204c7272abeSHong Zhang     current_space->local_remaining -= nzi;
205c7272abeSHong Zhang   }
2066cf91177SBarry Smith #if defined(PETSC_USE_INFO)
20783287d42SBarry Smith   if (ai[n] != 0) {
208c7272abeSHong Zhang     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
209ae15b995SBarry Smith     ierr = PetscInfo3(A,"Reallocs %D Fill ratio:given %G needed %G\n",reallocs,f,af);CHKERRQ(ierr);
210ae15b995SBarry Smith     ierr = PetscInfo1(A,"Run with -pc_factor_fill %G or use \n",af);CHKERRQ(ierr);
211ae15b995SBarry Smith     ierr = PetscInfo1(A,"PCFactorSetFill(pc,%G);\n",af);CHKERRQ(ierr);
212ae15b995SBarry Smith     ierr = PetscInfo(A,"for best performance.\n");CHKERRQ(ierr);
21383287d42SBarry Smith   } else {
214c7272abeSHong Zhang     ierr = PetscInfo(A,"Empty matrix\n");CHKERRQ(ierr);
21583287d42SBarry Smith   }
21663ba0a88SBarry Smith #endif
21783287d42SBarry Smith 
21883287d42SBarry Smith   ierr = ISRestoreIndices(isrow,&r);CHKERRQ(ierr);
21983287d42SBarry Smith   ierr = ISRestoreIndices(isicol,&ic);CHKERRQ(ierr);
22083287d42SBarry Smith 
221c7272abeSHong Zhang   /* destroy list of free space and other temporary array(s) */
222c7272abeSHong Zhang   ierr = PetscMalloc((bi[n]+1)*sizeof(PetscInt),&bj);CHKERRQ(ierr);
223c7272abeSHong Zhang   ierr = PetscFreeSpaceContiguous(&free_space,bj);CHKERRQ(ierr);
224c7272abeSHong Zhang   ierr = PetscLLDestroy(lnk,lnkbt);CHKERRQ(ierr);
225c7272abeSHong Zhang   ierr = PetscFree2(bi_ptr,im);CHKERRQ(ierr);
22683287d42SBarry Smith 
22783287d42SBarry Smith   /* put together the new matrix */
228719d5645SBarry Smith   ierr = MatSeqBAIJSetPreallocation_SeqBAIJ(B,bs,MAT_SKIP_ALLOCATION,PETSC_NULL);CHKERRQ(ierr);
229719d5645SBarry Smith   ierr = PetscLogObjectParent(B,isicol);CHKERRQ(ierr);
230719d5645SBarry Smith   b    = (Mat_SeqBAIJ*)(B)->data;
231e6b907acSBarry Smith   b->free_a       = PETSC_TRUE;
232e6b907acSBarry Smith   b->free_ij      = PETSC_TRUE;
233c7272abeSHong Zhang   b->singlemalloc = PETSC_FALSE;
234c7272abeSHong Zhang   ierr          = PetscMalloc((bi[n]+1)*sizeof(MatScalar)*bs2,&b->a);CHKERRQ(ierr);
235c7272abeSHong Zhang   b->j          = bj;
236c7272abeSHong Zhang   b->i          = bi;
237c7272abeSHong Zhang   b->diag       = bdiag;
23883287d42SBarry Smith   b->ilen       = 0;
23983287d42SBarry Smith   b->imax       = 0;
24083287d42SBarry Smith   b->row        = isrow;
24183287d42SBarry Smith   b->col        = iscol;
242d3d32019SBarry Smith   b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;
24383287d42SBarry Smith   ierr          = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr);
24483287d42SBarry Smith   ierr          = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr);
24583287d42SBarry Smith   b->icol       = isicol;
24687828ca2SBarry Smith   ierr = PetscMalloc((bs*n+bs)*sizeof(PetscScalar),&b->solve_work);CHKERRQ(ierr);
247c7272abeSHong Zhang   ierr = PetscLogObjectMemory(B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2));CHKERRQ(ierr);
24883287d42SBarry Smith 
249c7272abeSHong Zhang   b->maxnz = b->nz = bi[n] ;
250c7272abeSHong Zhang   (B)->factor                =  MAT_FACTOR_LU;
251719d5645SBarry Smith   (B)->info.factor_mallocs   = reallocs;
252719d5645SBarry Smith   (B)->info.fill_ratio_given = f;
253c7272abeSHong Zhang 
25483287d42SBarry Smith   if (ai[n] != 0) {
255c7272abeSHong Zhang     (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
25683287d42SBarry Smith   } else {
257719d5645SBarry Smith     (B)->info.fill_ratio_needed = 0.0;
25883287d42SBarry Smith   }
259c7272abeSHong Zhang 
260db4efbfdSBarry Smith   ierr = ISIdentity(isrow,&row_identity);CHKERRQ(ierr);
261db4efbfdSBarry Smith   ierr = ISIdentity(iscol,&col_identity);CHKERRQ(ierr);
2627d18ce8fSMatthew Knepley   both_identity = (PetscTruth) (row_identity && col_identity);
26341df41f0SMatthew Knepley   ierr = MatSeqBAIJSetNumericFactorization(B,both_identity);CHKERRQ(ierr);
26483287d42SBarry Smith   PetscFunctionReturn(0);
26583287d42SBarry Smith  }
266db4efbfdSBarry Smith 
267