1be1d678aSKris Buschelman 283287d42SBarry Smith /* 383287d42SBarry Smith Factorization code for BAIJ format. 483287d42SBarry Smith */ 5c6db04a5SJed Brown #include <../src/mat/impls/baij/seq/baij.h> 6af0996ceSBarry Smith #include <petsc/private/kernels/blockinvert.h> 783287d42SBarry Smith 8db4efbfdSBarry Smith /* 9db4efbfdSBarry Smith This is used to set the numeric factorization for both LU and ILU symbolic factorization 10db4efbfdSBarry Smith */ 11ace3abfcSBarry Smith PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat fact,PetscBool natural) 12ae3d28f0SHong Zhang { 13ae3d28f0SHong Zhang PetscFunctionBegin; 146929473cSShri Abhyankar if (natural) { 156929473cSShri Abhyankar switch (fact->rmap->bs) { 16048b5e81SShri Abhyankar case 1: 17048b5e81SShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 18048b5e81SShri Abhyankar break; 196929473cSShri Abhyankar case 2: 204dd39f65SShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering; 216929473cSShri Abhyankar break; 226929473cSShri Abhyankar case 3: 234dd39f65SShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering; 246929473cSShri Abhyankar break; 256929473cSShri Abhyankar case 4: 264dd39f65SShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering; 276929473cSShri Abhyankar break; 286929473cSShri Abhyankar case 5: 294dd39f65SShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering; 306929473cSShri Abhyankar break; 316929473cSShri Abhyankar case 6: 324dd39f65SShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering; 336929473cSShri Abhyankar break; 346929473cSShri Abhyankar case 7: 354dd39f65SShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering; 366929473cSShri Abhyankar break; 3796e086a2SDaniel Kokron case 9: 385f70456aSHong Zhang #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 3996e086a2SDaniel Kokron fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_9_NaturalOrdering; 40aee5c371SBarry Smith #else 41aee5c371SBarry Smith fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 42aee5c371SBarry Smith #endif 4396e086a2SDaniel Kokron break; 4429a97285SShri Abhyankar case 15: 4529a97285SShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_15_NaturalOrdering; 4629a97285SShri Abhyankar break; 476929473cSShri Abhyankar default: 484dd39f65SShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 496929473cSShri Abhyankar break; 506929473cSShri Abhyankar } 516ba06ab7SHong Zhang } else { 52ae3d28f0SHong Zhang switch (fact->rmap->bs) { 53048b5e81SShri Abhyankar case 1: 54048b5e81SShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 55048b5e81SShri Abhyankar break; 56ae3d28f0SHong Zhang case 2: 574dd39f65SShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 58ae3d28f0SHong Zhang break; 59ae3d28f0SHong Zhang case 3: 604dd39f65SShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 61ae3d28f0SHong Zhang break; 62ae3d28f0SHong Zhang case 4: 634dd39f65SShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 64ae3d28f0SHong Zhang break; 65ae3d28f0SHong Zhang case 5: 664dd39f65SShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 67ae3d28f0SHong Zhang break; 68ae3d28f0SHong Zhang case 6: 694dd39f65SShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 70ae3d28f0SHong Zhang break; 71ae3d28f0SHong Zhang case 7: 724dd39f65SShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 73ae3d28f0SHong Zhang break; 74ae3d28f0SHong Zhang default: 754dd39f65SShri Abhyankar fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 76ae3d28f0SHong Zhang break; 77ae3d28f0SHong Zhang } 786929473cSShri Abhyankar } 79ae3d28f0SHong Zhang PetscFunctionReturn(0); 80ae3d28f0SHong Zhang } 81ae3d28f0SHong Zhang 82ace3abfcSBarry Smith PetscErrorCode MatSeqBAIJSetNumericFactorization_inplace(Mat inA,PetscBool natural) 83db4efbfdSBarry Smith { 84db4efbfdSBarry Smith PetscFunctionBegin; 85db4efbfdSBarry Smith if (natural) { 86db4efbfdSBarry Smith switch (inA->rmap->bs) { 87db4efbfdSBarry Smith case 1: 8806e38f1dSHong Zhang inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace; 89db4efbfdSBarry Smith break; 90db4efbfdSBarry Smith case 2: 9106e38f1dSHong Zhang inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_inplace; 92db4efbfdSBarry Smith break; 93db4efbfdSBarry Smith case 3: 9406e38f1dSHong Zhang inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering_inplace; 95db4efbfdSBarry Smith break; 96db4efbfdSBarry Smith case 4: 97ce63c4c1SBarry Smith #if defined(PETSC_USE_REAL_MAT_SINGLE) 98db4efbfdSBarry Smith { 99ace3abfcSBarry Smith PetscBool sse_enabled_local; 1009566063dSJacob Faibussowitsch PetscCall(PetscSSEIsEnabled(inA->comm,&sse_enabled_local,NULL)); 101db4efbfdSBarry Smith if (sse_enabled_local) { 102db4efbfdSBarry Smith # if defined(PETSC_HAVE_SSE) 103db4efbfdSBarry Smith int i,*AJ=a->j,nz=a->nz,n=a->mbs; 104db4efbfdSBarry Smith if (n==(unsigned short)n) { 105db4efbfdSBarry Smith unsigned short *aj=(unsigned short*)AJ; 10626fbe8dcSKarl Rupp for (i=0; i<nz; i++) aj[i] = (unsigned short)AJ[i]; 10726fbe8dcSKarl Rupp 108db4efbfdSBarry Smith inA->ops->setunfactored = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE_usj; 109db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE_usj; 11026fbe8dcSKarl Rupp 1119566063dSJacob Faibussowitsch PetscCall(PetscInfo(inA,"Using special SSE, in-place natural ordering, ushort j index factor BS=4\n")); 112db4efbfdSBarry Smith } else { 113db4efbfdSBarry Smith /* Scale the column indices for easier indexing in MatSolve. */ 114db4efbfdSBarry Smith /* for (i=0;i<nz;i++) { */ 115db4efbfdSBarry Smith /* AJ[i] = AJ[i]*4; */ 116db4efbfdSBarry Smith /* } */ 117db4efbfdSBarry Smith inA->ops->setunfactored = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE; 118db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE; 11926fbe8dcSKarl Rupp 1209566063dSJacob Faibussowitsch PetscCall(PetscInfo(inA,"Using special SSE, in-place natural ordering, int j index factor BS=4\n")); 121db4efbfdSBarry Smith } 122db4efbfdSBarry Smith # else 123db4efbfdSBarry Smith /* This should never be reached. If so, problem in PetscSSEIsEnabled. */ 124e32f2f54SBarry Smith SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSE Hardware unavailable"); 125db4efbfdSBarry Smith # endif 126db4efbfdSBarry Smith } else { 12706e38f1dSHong Zhang inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace; 128db4efbfdSBarry Smith } 129db4efbfdSBarry Smith } 130db4efbfdSBarry Smith #else 13106e38f1dSHong Zhang inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace; 132db4efbfdSBarry Smith #endif 133db4efbfdSBarry Smith break; 134db4efbfdSBarry Smith case 5: 13506e38f1dSHong Zhang inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering_inplace; 136db4efbfdSBarry Smith break; 137db4efbfdSBarry Smith case 6: 13806e38f1dSHong Zhang inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering_inplace; 139db4efbfdSBarry Smith break; 140db4efbfdSBarry Smith case 7: 14106e38f1dSHong Zhang inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering_inplace; 142db4efbfdSBarry Smith break; 143db4efbfdSBarry Smith default: 14406e38f1dSHong Zhang inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace; 145db4efbfdSBarry Smith break; 146db4efbfdSBarry Smith } 147db4efbfdSBarry Smith } else { 148db4efbfdSBarry Smith switch (inA->rmap->bs) { 149db4efbfdSBarry Smith case 1: 15006e38f1dSHong Zhang inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace; 151db4efbfdSBarry Smith break; 152db4efbfdSBarry Smith case 2: 15306e38f1dSHong Zhang inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_inplace; 154db4efbfdSBarry Smith break; 155db4efbfdSBarry Smith case 3: 15606e38f1dSHong Zhang inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_inplace; 157db4efbfdSBarry Smith break; 158db4efbfdSBarry Smith case 4: 15906e38f1dSHong Zhang inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_inplace; 160db4efbfdSBarry Smith break; 161db4efbfdSBarry Smith case 5: 16206e38f1dSHong Zhang inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_inplace; 163db4efbfdSBarry Smith break; 164db4efbfdSBarry Smith case 6: 16506e38f1dSHong Zhang inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_inplace; 166db4efbfdSBarry Smith break; 167db4efbfdSBarry Smith case 7: 16806e38f1dSHong Zhang inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_inplace; 169db4efbfdSBarry Smith break; 170db4efbfdSBarry Smith default: 17106e38f1dSHong Zhang inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace; 172db4efbfdSBarry Smith break; 173db4efbfdSBarry Smith } 174db4efbfdSBarry Smith } 175db4efbfdSBarry Smith PetscFunctionReturn(0); 176db4efbfdSBarry Smith } 177db4efbfdSBarry Smith 17883287d42SBarry Smith /* 17983287d42SBarry Smith The symbolic factorization code is identical to that for AIJ format, 18083287d42SBarry Smith except for very small changes since this is now a SeqBAIJ datastructure. 18183287d42SBarry Smith NOT good code reuse. 18283287d42SBarry Smith */ 183c6db04a5SJed Brown #include <petscbt.h> 184c6db04a5SJed Brown #include <../src/mat/utils/freespace.h> 185c7272abeSHong Zhang 1864dd39f65SShri Abhyankar PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 18783287d42SBarry Smith { 18883287d42SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b; 189c7272abeSHong Zhang PetscInt n =a->mbs,bs = A->rmap->bs,bs2=a->bs2; 190ace3abfcSBarry Smith PetscBool row_identity,col_identity,both_identity; 19183287d42SBarry Smith IS isicol; 192c7272abeSHong Zhang const PetscInt *r,*ic; 193c7272abeSHong Zhang PetscInt i,*ai=a->i,*aj=a->j; 194c7272abeSHong Zhang PetscInt *bi,*bj,*ajtmp; 195c7272abeSHong Zhang PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 196c7272abeSHong Zhang PetscReal f; 197c7272abeSHong Zhang PetscInt nlnk,*lnk,k,**bi_ptr; 1980298fd71SBarry Smith PetscFreeSpaceList free_space=NULL,current_space=NULL; 199c7272abeSHong Zhang PetscBT lnkbt; 200ece542f9SHong Zhang PetscBool missing; 20183287d42SBarry Smith 20283287d42SBarry Smith PetscFunctionBegin; 20394bad497SJacob Faibussowitsch PetscCheck(A->rmap->N == A->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square"); 2049566063dSJacob Faibussowitsch PetscCall(MatMissingDiagonal(A,&missing,&i)); 20594bad497SJacob Faibussowitsch PetscCheck(!missing,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %" PetscInt_FMT,i); 206ece542f9SHong Zhang 2076ba06ab7SHong Zhang if (bs>1) { /* check shifttype */ 208*aed4548fSBarry Smith PetscCheck(info->shifttype != MAT_SHIFT_NONZERO && info->shifttype != MAT_SHIFT_POSITIVE_DEFINITE,PETSC_COMM_SELF,PETSC_ERR_SUP,"Only MAT_SHIFT_NONE and MAT_SHIFT_INBLOCKS are supported for BAIJ matrix"); 2096ba06ab7SHong Zhang } 2106ba06ab7SHong Zhang 2119566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(iscol,PETSC_DECIDE,&isicol)); 2129566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow,&r)); 2139566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol,&ic)); 21483287d42SBarry Smith 215bc74c81fSJed Brown /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 2169566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n+1,&bi)); 2179566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n+1,&bdiag)); 218a6df321fSShri Abhyankar bi[0] = bdiag[0] = 0; 219b2b2dd24SShri Abhyankar 220b2b2dd24SShri Abhyankar /* linked list for storing column indices of the active row */ 221b2b2dd24SShri Abhyankar nlnk = n + 1; 2229566063dSJacob Faibussowitsch PetscCall(PetscLLCreate(n,n,nlnk,lnk,lnkbt)); 223b2b2dd24SShri Abhyankar 2249566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(n+1,&bi_ptr,n+1,&im)); 225b2b2dd24SShri Abhyankar 226b2b2dd24SShri Abhyankar /* initial FreeSpace size is f*(ai[n]+1) */ 227b2b2dd24SShri Abhyankar f = info->fill; 2289566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space)); 22926fbe8dcSKarl Rupp 230b2b2dd24SShri Abhyankar current_space = free_space; 231b2b2dd24SShri Abhyankar 232b2b2dd24SShri Abhyankar for (i=0; i<n; i++) { 233b2b2dd24SShri Abhyankar /* copy previous fill into linked list */ 234b2b2dd24SShri Abhyankar nzi = 0; 235b2b2dd24SShri Abhyankar nnz = ai[r[i]+1] - ai[r[i]]; 236b2b2dd24SShri Abhyankar ajtmp = aj + ai[r[i]]; 2379566063dSJacob Faibussowitsch PetscCall(PetscLLAddPerm(nnz,ajtmp,ic,n,&nlnk,lnk,lnkbt)); 238b2b2dd24SShri Abhyankar nzi += nlnk; 239b2b2dd24SShri Abhyankar 240b2b2dd24SShri Abhyankar /* add pivot rows into linked list */ 241b2b2dd24SShri Abhyankar row = lnk[n]; 242b2b2dd24SShri Abhyankar while (row < i) { 243b2b2dd24SShri Abhyankar nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 244b2b2dd24SShri Abhyankar ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 2459566063dSJacob Faibussowitsch PetscCall(PetscLLAddSortedLU(ajtmp,row,&nlnk,lnk,lnkbt,i,nzbd,im)); 246b2b2dd24SShri Abhyankar nzi += nlnk; 247b2b2dd24SShri Abhyankar row = lnk[row]; 248b2b2dd24SShri Abhyankar } 249b2b2dd24SShri Abhyankar bi[i+1] = bi[i] + nzi; 250b2b2dd24SShri Abhyankar im[i] = nzi; 251b2b2dd24SShri Abhyankar 252b2b2dd24SShri Abhyankar /* mark bdiag */ 253b2b2dd24SShri Abhyankar nzbd = 0; 254b2b2dd24SShri Abhyankar nnz = nzi; 255b2b2dd24SShri Abhyankar k = lnk[n]; 256b2b2dd24SShri Abhyankar while (nnz-- && k < i) { 257b2b2dd24SShri Abhyankar nzbd++; 258b2b2dd24SShri Abhyankar k = lnk[k]; 259b2b2dd24SShri Abhyankar } 2602ce24eb6SHong Zhang bdiag[i] = nzbd; /* note : bdaig[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */ 261b2b2dd24SShri Abhyankar 262b2b2dd24SShri Abhyankar /* if free space is not available, make more free space */ 263b2b2dd24SShri Abhyankar if (current_space->local_remaining<nzi) { 264f91af8c7SBarry Smith nnz = PetscIntMultTruncate(2,PetscIntMultTruncate(n - i,nzi)); /* estimated and max additional space needed */ 2659566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(nnz,¤t_space)); 266b2b2dd24SShri Abhyankar reallocs++; 267b2b2dd24SShri Abhyankar } 268b2b2dd24SShri Abhyankar 269b2b2dd24SShri Abhyankar /* copy data into free space, then initialize lnk */ 2709566063dSJacob Faibussowitsch PetscCall(PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt)); 27126fbe8dcSKarl Rupp 272b2b2dd24SShri Abhyankar bi_ptr[i] = current_space->array; 273b2b2dd24SShri Abhyankar current_space->array += nzi; 274b2b2dd24SShri Abhyankar current_space->local_used += nzi; 275b2b2dd24SShri Abhyankar current_space->local_remaining -= nzi; 276b2b2dd24SShri Abhyankar } 277b2b2dd24SShri Abhyankar 2789566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow,&r)); 2799566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol,&ic)); 280b2b2dd24SShri Abhyankar 2819263d837SHong Zhang /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 2829566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(bi[n]+1,&bj)); 2839566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag)); 2849566063dSJacob Faibussowitsch PetscCall(PetscLLDestroy(lnk,lnkbt)); 2859566063dSJacob Faibussowitsch PetscCall(PetscFree2(bi_ptr,im)); 286b2b2dd24SShri Abhyankar 287b2b2dd24SShri Abhyankar /* put together the new matrix */ 2889566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(B,bs,MAT_SKIP_ALLOCATION,NULL)); 2899566063dSJacob Faibussowitsch PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)isicol)); 290b2b2dd24SShri Abhyankar b = (Mat_SeqBAIJ*)(B)->data; 29126fbe8dcSKarl Rupp 292b2b2dd24SShri Abhyankar b->free_a = PETSC_TRUE; 293b2b2dd24SShri Abhyankar b->free_ij = PETSC_TRUE; 294b2b2dd24SShri Abhyankar b->singlemalloc = PETSC_FALSE; 29526fbe8dcSKarl Rupp 2969566063dSJacob Faibussowitsch PetscCall(PetscMalloc1((bdiag[0]+1)*bs2,&b->a)); 297b2b2dd24SShri Abhyankar b->j = bj; 298b2b2dd24SShri Abhyankar b->i = bi; 299b2b2dd24SShri Abhyankar b->diag = bdiag; 300b2b2dd24SShri Abhyankar b->free_diag = PETSC_TRUE; 301f4259b30SLisandro Dalcin b->ilen = NULL; 302f4259b30SLisandro Dalcin b->imax = NULL; 303b2b2dd24SShri Abhyankar b->row = isrow; 304b2b2dd24SShri Abhyankar b->col = iscol; 305b2b2dd24SShri Abhyankar b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE; 30626fbe8dcSKarl Rupp 3079566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)isrow)); 3089566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)iscol)); 309b2b2dd24SShri Abhyankar b->icol = isicol; 3109566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(bs*n+bs,&b->solve_work)); 3119566063dSJacob Faibussowitsch PetscCall(PetscLogObjectMemory((PetscObject)B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2))); 312b2b2dd24SShri Abhyankar 313b2b2dd24SShri Abhyankar b->maxnz = b->nz = bdiag[0]+1; 31426fbe8dcSKarl Rupp 315d5f3da31SBarry Smith B->factortype = MAT_FACTOR_LU; 316b2b2dd24SShri Abhyankar B->info.factor_mallocs = reallocs; 317b2b2dd24SShri Abhyankar B->info.fill_ratio_given = f; 318b2b2dd24SShri Abhyankar 319b2b2dd24SShri Abhyankar if (ai[n] != 0) { 320b2b2dd24SShri Abhyankar B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]); 321b2b2dd24SShri Abhyankar } else { 322b2b2dd24SShri Abhyankar B->info.fill_ratio_needed = 0.0; 323b2b2dd24SShri Abhyankar } 3249263d837SHong Zhang #if defined(PETSC_USE_INFO) 3259263d837SHong Zhang if (ai[n] != 0) { 3269263d837SHong Zhang PetscReal af = B->info.fill_ratio_needed; 3279566063dSJacob Faibussowitsch PetscCall(PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af)); 3289566063dSJacob Faibussowitsch PetscCall(PetscInfo(A,"Run with -pc_factor_fill %g or use \n",(double)af)); 3299566063dSJacob Faibussowitsch PetscCall(PetscInfo(A,"PCFactorSetFill(pc,%g);\n",(double)af)); 3309566063dSJacob Faibussowitsch PetscCall(PetscInfo(A,"for best performance.\n")); 3319263d837SHong Zhang } else { 3329566063dSJacob Faibussowitsch PetscCall(PetscInfo(A,"Empty matrix\n")); 3339263d837SHong Zhang } 3349263d837SHong Zhang #endif 335b2b2dd24SShri Abhyankar 3369566063dSJacob Faibussowitsch PetscCall(ISIdentity(isrow,&row_identity)); 3379566063dSJacob Faibussowitsch PetscCall(ISIdentity(iscol,&col_identity)); 33826fbe8dcSKarl Rupp 339ace3abfcSBarry Smith both_identity = (PetscBool) (row_identity && col_identity); 34026fbe8dcSKarl Rupp 3419566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetNumericFactorization(B,both_identity)); 342b2b2dd24SShri Abhyankar PetscFunctionReturn(0); 343b2b2dd24SShri Abhyankar } 344b2b2dd24SShri Abhyankar 34506e38f1dSHong Zhang PetscErrorCode MatLUFactorSymbolic_SeqBAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info) 346faca2338SShri Abhyankar { 347faca2338SShri Abhyankar Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data,*b; 348faca2338SShri Abhyankar PetscInt n =a->mbs,bs = A->rmap->bs,bs2=a->bs2; 349ace3abfcSBarry Smith PetscBool row_identity,col_identity,both_identity; 350faca2338SShri Abhyankar IS isicol; 351faca2338SShri Abhyankar const PetscInt *r,*ic; 352faca2338SShri Abhyankar PetscInt i,*ai=a->i,*aj=a->j; 353faca2338SShri Abhyankar PetscInt *bi,*bj,*ajtmp; 354faca2338SShri Abhyankar PetscInt *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im; 355faca2338SShri Abhyankar PetscReal f; 356faca2338SShri Abhyankar PetscInt nlnk,*lnk,k,**bi_ptr; 3570298fd71SBarry Smith PetscFreeSpaceList free_space=NULL,current_space=NULL; 358faca2338SShri Abhyankar PetscBT lnkbt; 359ece542f9SHong Zhang PetscBool missing; 360faca2338SShri Abhyankar 361faca2338SShri Abhyankar PetscFunctionBegin; 36294bad497SJacob Faibussowitsch PetscCheck(A->rmap->N == A->cmap->N,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"matrix must be square"); 3639566063dSJacob Faibussowitsch PetscCall(MatMissingDiagonal(A,&missing,&i)); 36494bad497SJacob Faibussowitsch PetscCheck(!missing,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Matrix is missing diagonal entry %" PetscInt_FMT,i); 365ece542f9SHong Zhang 3669566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(iscol,PETSC_DECIDE,&isicol)); 3679566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow,&r)); 3689566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol,&ic)); 369faca2338SShri Abhyankar 370bc74c81fSJed Brown /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 3719566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n+1,&bi)); 3729566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n+1,&bdiag)); 373bc74c81fSJed Brown 374a6df321fSShri Abhyankar bi[0] = bdiag[0] = 0; 375c7272abeSHong Zhang 376c7272abeSHong Zhang /* linked list for storing column indices of the active row */ 377c7272abeSHong Zhang nlnk = n + 1; 3789566063dSJacob Faibussowitsch PetscCall(PetscLLCreate(n,n,nlnk,lnk,lnkbt)); 379c7272abeSHong Zhang 3809566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(n+1,&bi_ptr,n+1,&im)); 381c7272abeSHong Zhang 382c7272abeSHong Zhang /* initial FreeSpace size is f*(ai[n]+1) */ 383c7272abeSHong Zhang f = info->fill; 3849566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space)); 385c7272abeSHong Zhang current_space = free_space; 38683287d42SBarry Smith 38783287d42SBarry Smith for (i=0; i<n; i++) { 388c7272abeSHong Zhang /* copy previous fill into linked list */ 38983287d42SBarry Smith nzi = 0; 390c7272abeSHong Zhang nnz = ai[r[i]+1] - ai[r[i]]; 391c7272abeSHong Zhang ajtmp = aj + ai[r[i]]; 3929566063dSJacob Faibussowitsch PetscCall(PetscLLAddPerm(nnz,ajtmp,ic,n,&nlnk,lnk,lnkbt)); 393c7272abeSHong Zhang nzi += nlnk; 394c7272abeSHong Zhang 395c7272abeSHong Zhang /* add pivot rows into linked list */ 396c7272abeSHong Zhang row = lnk[n]; 397c7272abeSHong Zhang while (row < i) { 398c7272abeSHong Zhang nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 399c7272abeSHong Zhang ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 4009566063dSJacob Faibussowitsch PetscCall(PetscLLAddSortedLU(ajtmp,row,&nlnk,lnk,lnkbt,i,nzbd,im)); 401c7272abeSHong Zhang nzi += nlnk; 402c7272abeSHong Zhang row = lnk[row]; 40383287d42SBarry Smith } 404c7272abeSHong Zhang bi[i+1] = bi[i] + nzi; 405c7272abeSHong Zhang im[i] = nzi; 406c7272abeSHong Zhang 407c7272abeSHong Zhang /* mark bdiag */ 408c7272abeSHong Zhang nzbd = 0; 409c7272abeSHong Zhang nnz = nzi; 410c7272abeSHong Zhang k = lnk[n]; 411c7272abeSHong Zhang while (nnz-- && k < i) { 412c7272abeSHong Zhang nzbd++; 413c7272abeSHong Zhang k = lnk[k]; 414c7272abeSHong Zhang } 415c7272abeSHong Zhang bdiag[i] = bi[i] + nzbd; 416c7272abeSHong Zhang 417c7272abeSHong Zhang /* if free space is not available, make more free space */ 418c7272abeSHong Zhang if (current_space->local_remaining<nzi) { 419f91af8c7SBarry Smith nnz = PetscIntMultTruncate(n - i,nzi); /* estimated and max additional space needed */ 4209566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(nnz,¤t_space)); 421c7272abeSHong Zhang reallocs++; 42283287d42SBarry Smith } 42383287d42SBarry Smith 424c7272abeSHong Zhang /* copy data into free space, then initialize lnk */ 4259566063dSJacob Faibussowitsch PetscCall(PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt)); 42626fbe8dcSKarl Rupp 427c7272abeSHong Zhang bi_ptr[i] = current_space->array; 428c7272abeSHong Zhang current_space->array += nzi; 429c7272abeSHong Zhang current_space->local_used += nzi; 430c7272abeSHong Zhang current_space->local_remaining -= nzi; 431c7272abeSHong Zhang } 4326cf91177SBarry Smith #if defined(PETSC_USE_INFO) 43383287d42SBarry Smith if (ai[n] != 0) { 434c7272abeSHong Zhang PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]); 4359566063dSJacob Faibussowitsch PetscCall(PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af)); 4369566063dSJacob Faibussowitsch PetscCall(PetscInfo(A,"Run with -pc_factor_fill %g or use \n",(double)af)); 4379566063dSJacob Faibussowitsch PetscCall(PetscInfo(A,"PCFactorSetFill(pc,%g);\n",(double)af)); 4389566063dSJacob Faibussowitsch PetscCall(PetscInfo(A,"for best performance.\n")); 43983287d42SBarry Smith } else { 4409566063dSJacob Faibussowitsch PetscCall(PetscInfo(A,"Empty matrix\n")); 44183287d42SBarry Smith } 44263ba0a88SBarry Smith #endif 44383287d42SBarry Smith 4449566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow,&r)); 4459566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol,&ic)); 44683287d42SBarry Smith 447c7272abeSHong Zhang /* destroy list of free space and other temporary array(s) */ 4489566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(bi[n]+1,&bj)); 4499566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous(&free_space,bj)); 4509566063dSJacob Faibussowitsch PetscCall(PetscLLDestroy(lnk,lnkbt)); 4519566063dSJacob Faibussowitsch PetscCall(PetscFree2(bi_ptr,im)); 45283287d42SBarry Smith 45383287d42SBarry Smith /* put together the new matrix */ 4549566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(B,bs,MAT_SKIP_ALLOCATION,NULL)); 4559566063dSJacob Faibussowitsch PetscCall(PetscLogObjectParent((PetscObject)B,(PetscObject)isicol)); 456719d5645SBarry Smith b = (Mat_SeqBAIJ*)(B)->data; 45726fbe8dcSKarl Rupp 458e6b907acSBarry Smith b->free_a = PETSC_TRUE; 459e6b907acSBarry Smith b->free_ij = PETSC_TRUE; 460c7272abeSHong Zhang b->singlemalloc = PETSC_FALSE; 46126fbe8dcSKarl Rupp 4629566063dSJacob Faibussowitsch PetscCall(PetscMalloc1((bi[n]+1)*bs2,&b->a)); 463c7272abeSHong Zhang b->j = bj; 464c7272abeSHong Zhang b->i = bi; 465c7272abeSHong Zhang b->diag = bdiag; 4667f53bb6cSHong Zhang b->free_diag = PETSC_TRUE; 467f4259b30SLisandro Dalcin b->ilen = NULL; 468f4259b30SLisandro Dalcin b->imax = NULL; 46983287d42SBarry Smith b->row = isrow; 47083287d42SBarry Smith b->col = iscol; 471d3d32019SBarry Smith b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE; 47226fbe8dcSKarl Rupp 4739566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)isrow)); 4749566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)iscol)); 47583287d42SBarry Smith b->icol = isicol; 47626fbe8dcSKarl Rupp 4779566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(bs*n+bs,&b->solve_work)); 4789566063dSJacob Faibussowitsch PetscCall(PetscLogObjectMemory((PetscObject)B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2))); 47983287d42SBarry Smith 480c7272abeSHong Zhang b->maxnz = b->nz = bi[n]; 48126fbe8dcSKarl Rupp 482d5f3da31SBarry Smith (B)->factortype = MAT_FACTOR_LU; 483719d5645SBarry Smith (B)->info.factor_mallocs = reallocs; 484719d5645SBarry Smith (B)->info.fill_ratio_given = f; 485c7272abeSHong Zhang 48683287d42SBarry Smith if (ai[n] != 0) { 487c7272abeSHong Zhang (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]); 48883287d42SBarry Smith } else { 489719d5645SBarry Smith (B)->info.fill_ratio_needed = 0.0; 49083287d42SBarry Smith } 491c7272abeSHong Zhang 4929566063dSJacob Faibussowitsch PetscCall(ISIdentity(isrow,&row_identity)); 4939566063dSJacob Faibussowitsch PetscCall(ISIdentity(iscol,&col_identity)); 49426fbe8dcSKarl Rupp 495ace3abfcSBarry Smith both_identity = (PetscBool) (row_identity && col_identity); 49626fbe8dcSKarl Rupp 4979566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetNumericFactorization_inplace(B,both_identity)); 49883287d42SBarry Smith PetscFunctionReturn(0); 49983287d42SBarry Smith } 500