183287d42SBarry Smith /* 283287d42SBarry Smith Factorization code for BAIJ format. 383287d42SBarry Smith */ 4c6db04a5SJed Brown #include <../src/mat/impls/baij/seq/baij.h> 5af0996ceSBarry Smith #include <petsc/private/kernels/blockinvert.h> 683287d42SBarry Smith 7db4efbfdSBarry Smith /* 8db4efbfdSBarry Smith This is used to set the numeric factorization for both LU and ILU symbolic factorization 9db4efbfdSBarry Smith */ 10d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat fact, PetscBool natural) 11d71ae5a4SJacob Faibussowitsch { 12ae3d28f0SHong Zhang PetscFunctionBegin; 136929473cSShri Abhyankar if (natural) { 146929473cSShri Abhyankar switch (fact->rmap->bs) { 15d71ae5a4SJacob Faibussowitsch case 1: 16d71ae5a4SJacob Faibussowitsch fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 17d71ae5a4SJacob Faibussowitsch break; 18d71ae5a4SJacob Faibussowitsch case 2: 19d71ae5a4SJacob Faibussowitsch fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering; 20d71ae5a4SJacob Faibussowitsch break; 21d71ae5a4SJacob Faibussowitsch case 3: 22d71ae5a4SJacob Faibussowitsch fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering; 23d71ae5a4SJacob Faibussowitsch break; 24d71ae5a4SJacob Faibussowitsch case 4: 25d71ae5a4SJacob Faibussowitsch fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering; 26d71ae5a4SJacob Faibussowitsch break; 27d71ae5a4SJacob Faibussowitsch case 5: 28d71ae5a4SJacob Faibussowitsch fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering; 29d71ae5a4SJacob Faibussowitsch break; 30d71ae5a4SJacob Faibussowitsch case 6: 31d71ae5a4SJacob Faibussowitsch fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering; 32d71ae5a4SJacob Faibussowitsch break; 33d71ae5a4SJacob Faibussowitsch case 7: 34d71ae5a4SJacob Faibussowitsch fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering; 35d71ae5a4SJacob Faibussowitsch break; 3696e086a2SDaniel Kokron case 9: 375f70456aSHong Zhang #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 3896e086a2SDaniel Kokron fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_9_NaturalOrdering; 39aee5c371SBarry Smith #else 40aee5c371SBarry Smith fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 41aee5c371SBarry Smith #endif 4296e086a2SDaniel Kokron break; 43d71ae5a4SJacob Faibussowitsch case 15: 44d71ae5a4SJacob Faibussowitsch fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_15_NaturalOrdering; 45d71ae5a4SJacob Faibussowitsch break; 46d71ae5a4SJacob Faibussowitsch default: 47d71ae5a4SJacob Faibussowitsch fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 48d71ae5a4SJacob Faibussowitsch break; 496929473cSShri Abhyankar } 506ba06ab7SHong Zhang } else { 51ae3d28f0SHong Zhang switch (fact->rmap->bs) { 52d71ae5a4SJacob Faibussowitsch case 1: 53d71ae5a4SJacob Faibussowitsch fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 54d71ae5a4SJacob Faibussowitsch break; 55d71ae5a4SJacob Faibussowitsch case 2: 56d71ae5a4SJacob Faibussowitsch fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 57d71ae5a4SJacob Faibussowitsch break; 58d71ae5a4SJacob Faibussowitsch case 3: 59d71ae5a4SJacob Faibussowitsch fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 60d71ae5a4SJacob Faibussowitsch break; 61d71ae5a4SJacob Faibussowitsch case 4: 62d71ae5a4SJacob Faibussowitsch fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 63d71ae5a4SJacob Faibussowitsch break; 64d71ae5a4SJacob Faibussowitsch case 5: 65d71ae5a4SJacob Faibussowitsch fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 66d71ae5a4SJacob Faibussowitsch break; 67d71ae5a4SJacob Faibussowitsch case 6: 68d71ae5a4SJacob Faibussowitsch fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 69d71ae5a4SJacob Faibussowitsch break; 70d71ae5a4SJacob Faibussowitsch case 7: 71d71ae5a4SJacob Faibussowitsch fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 72d71ae5a4SJacob Faibussowitsch break; 73d71ae5a4SJacob Faibussowitsch default: 74d71ae5a4SJacob Faibussowitsch fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 75d71ae5a4SJacob Faibussowitsch break; 76ae3d28f0SHong Zhang } 776929473cSShri Abhyankar } 783ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 79ae3d28f0SHong Zhang } 80ae3d28f0SHong Zhang 81d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSeqBAIJSetNumericFactorization_inplace(Mat inA, PetscBool natural) 82d71ae5a4SJacob Faibussowitsch { 83db4efbfdSBarry Smith PetscFunctionBegin; 84db4efbfdSBarry Smith if (natural) { 85db4efbfdSBarry Smith switch (inA->rmap->bs) { 86d71ae5a4SJacob Faibussowitsch case 1: 87d71ae5a4SJacob Faibussowitsch inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace; 88d71ae5a4SJacob Faibussowitsch break; 89d71ae5a4SJacob Faibussowitsch case 2: 90d71ae5a4SJacob Faibussowitsch inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_inplace; 91d71ae5a4SJacob Faibussowitsch break; 92d71ae5a4SJacob Faibussowitsch case 3: 93d71ae5a4SJacob Faibussowitsch inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering_inplace; 94d71ae5a4SJacob Faibussowitsch break; 95db4efbfdSBarry Smith case 4: 9606e38f1dSHong Zhang inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace; 97db4efbfdSBarry Smith break; 98d71ae5a4SJacob Faibussowitsch case 5: 99d71ae5a4SJacob Faibussowitsch inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering_inplace; 100d71ae5a4SJacob Faibussowitsch break; 101d71ae5a4SJacob Faibussowitsch case 6: 102d71ae5a4SJacob Faibussowitsch inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering_inplace; 103d71ae5a4SJacob Faibussowitsch break; 104d71ae5a4SJacob Faibussowitsch case 7: 105d71ae5a4SJacob Faibussowitsch inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering_inplace; 106d71ae5a4SJacob Faibussowitsch break; 107d71ae5a4SJacob Faibussowitsch default: 108d71ae5a4SJacob Faibussowitsch inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace; 109d71ae5a4SJacob Faibussowitsch break; 110db4efbfdSBarry Smith } 111db4efbfdSBarry Smith } else { 112db4efbfdSBarry Smith switch (inA->rmap->bs) { 113d71ae5a4SJacob Faibussowitsch case 1: 114d71ae5a4SJacob Faibussowitsch inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace; 115d71ae5a4SJacob Faibussowitsch break; 116d71ae5a4SJacob Faibussowitsch case 2: 117d71ae5a4SJacob Faibussowitsch inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_inplace; 118d71ae5a4SJacob Faibussowitsch break; 119d71ae5a4SJacob Faibussowitsch case 3: 120d71ae5a4SJacob Faibussowitsch inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_inplace; 121d71ae5a4SJacob Faibussowitsch break; 122d71ae5a4SJacob Faibussowitsch case 4: 123d71ae5a4SJacob Faibussowitsch inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_inplace; 124d71ae5a4SJacob Faibussowitsch break; 125d71ae5a4SJacob Faibussowitsch case 5: 126d71ae5a4SJacob Faibussowitsch inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_inplace; 127d71ae5a4SJacob Faibussowitsch break; 128d71ae5a4SJacob Faibussowitsch case 6: 129d71ae5a4SJacob Faibussowitsch inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_inplace; 130d71ae5a4SJacob Faibussowitsch break; 131d71ae5a4SJacob Faibussowitsch case 7: 132d71ae5a4SJacob Faibussowitsch inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_inplace; 133d71ae5a4SJacob Faibussowitsch break; 134d71ae5a4SJacob Faibussowitsch default: 135d71ae5a4SJacob Faibussowitsch inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace; 136d71ae5a4SJacob Faibussowitsch break; 137db4efbfdSBarry Smith } 138db4efbfdSBarry Smith } 1393ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 140db4efbfdSBarry Smith } 141db4efbfdSBarry Smith 14283287d42SBarry Smith /* 14383287d42SBarry Smith The symbolic factorization code is identical to that for AIJ format, 14483287d42SBarry Smith except for very small changes since this is now a SeqBAIJ datastructure. 14583287d42SBarry Smith NOT good code reuse. 14683287d42SBarry Smith */ 147c6db04a5SJed Brown #include <petscbt.h> 148c6db04a5SJed Brown #include <../src/mat/utils/freespace.h> 149c7272abeSHong Zhang 150d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 151d71ae5a4SJacob Faibussowitsch { 15283287d42SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b; 153c7272abeSHong Zhang PetscInt n = a->mbs, bs = A->rmap->bs, bs2 = a->bs2; 154ace3abfcSBarry Smith PetscBool row_identity, col_identity, both_identity; 15583287d42SBarry Smith IS isicol; 156c7272abeSHong Zhang const PetscInt *r, *ic; 157c7272abeSHong Zhang PetscInt i, *ai = a->i, *aj = a->j; 158c7272abeSHong Zhang PetscInt *bi, *bj, *ajtmp; 159c7272abeSHong Zhang PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im; 160c7272abeSHong Zhang PetscReal f; 161c7272abeSHong Zhang PetscInt nlnk, *lnk, k, **bi_ptr; 1620298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 163c7272abeSHong Zhang PetscBT lnkbt; 164ece542f9SHong Zhang PetscBool missing; 16583287d42SBarry Smith 16683287d42SBarry Smith PetscFunctionBegin; 16794bad497SJacob Faibussowitsch PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square"); 1689566063dSJacob Faibussowitsch PetscCall(MatMissingDiagonal(A, &missing, &i)); 16994bad497SJacob Faibussowitsch PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 170ece542f9SHong Zhang 1716ba06ab7SHong Zhang if (bs > 1) { /* check shifttype */ 17213bcc0bdSJacob Faibussowitsch PetscCheck(info->shifttype != (PetscReal)MAT_SHIFT_NONZERO && info->shifttype != (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only MAT_SHIFT_NONE and MAT_SHIFT_INBLOCKS are supported for BAIJ matrix"); 1736ba06ab7SHong Zhang } 1746ba06ab7SHong Zhang 1759566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 1769566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r)); 1779566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic)); 17883287d42SBarry Smith 179bc74c81fSJed Brown /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 1809566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &bi)); 1819566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &bdiag)); 182a6df321fSShri Abhyankar bi[0] = bdiag[0] = 0; 183b2b2dd24SShri Abhyankar 184b2b2dd24SShri Abhyankar /* linked list for storing column indices of the active row */ 185b2b2dd24SShri Abhyankar nlnk = n + 1; 1869566063dSJacob Faibussowitsch PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt)); 187b2b2dd24SShri Abhyankar 1889566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im)); 189b2b2dd24SShri Abhyankar 190b2b2dd24SShri Abhyankar /* initial FreeSpace size is f*(ai[n]+1) */ 191b2b2dd24SShri Abhyankar f = info->fill; 1929566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 19326fbe8dcSKarl Rupp 194b2b2dd24SShri Abhyankar current_space = free_space; 195b2b2dd24SShri Abhyankar 196b2b2dd24SShri Abhyankar for (i = 0; i < n; i++) { 197b2b2dd24SShri Abhyankar /* copy previous fill into linked list */ 198b2b2dd24SShri Abhyankar nzi = 0; 199b2b2dd24SShri Abhyankar nnz = ai[r[i] + 1] - ai[r[i]]; 200b2b2dd24SShri Abhyankar ajtmp = aj + ai[r[i]]; 2019566063dSJacob Faibussowitsch PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt)); 202b2b2dd24SShri Abhyankar nzi += nlnk; 203b2b2dd24SShri Abhyankar 204b2b2dd24SShri Abhyankar /* add pivot rows into linked list */ 205b2b2dd24SShri Abhyankar row = lnk[n]; 206b2b2dd24SShri Abhyankar while (row < i) { 207b2b2dd24SShri Abhyankar nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 208b2b2dd24SShri Abhyankar ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 2099566063dSJacob Faibussowitsch PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im)); 210b2b2dd24SShri Abhyankar nzi += nlnk; 211b2b2dd24SShri Abhyankar row = lnk[row]; 212b2b2dd24SShri Abhyankar } 213b2b2dd24SShri Abhyankar bi[i + 1] = bi[i] + nzi; 214b2b2dd24SShri Abhyankar im[i] = nzi; 215b2b2dd24SShri Abhyankar 216b2b2dd24SShri Abhyankar /* mark bdiag */ 217b2b2dd24SShri Abhyankar nzbd = 0; 218b2b2dd24SShri Abhyankar nnz = nzi; 219b2b2dd24SShri Abhyankar k = lnk[n]; 220b2b2dd24SShri Abhyankar while (nnz-- && k < i) { 221b2b2dd24SShri Abhyankar nzbd++; 222b2b2dd24SShri Abhyankar k = lnk[k]; 223b2b2dd24SShri Abhyankar } 2242ce24eb6SHong Zhang bdiag[i] = nzbd; /* note : bdaig[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */ 225b2b2dd24SShri Abhyankar 226b2b2dd24SShri Abhyankar /* if free space is not available, make more free space */ 227b2b2dd24SShri Abhyankar if (current_space->local_remaining < nzi) { 228f91af8c7SBarry Smith nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - i, nzi)); /* estimated and max additional space needed */ 2299566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 230b2b2dd24SShri Abhyankar reallocs++; 231b2b2dd24SShri Abhyankar } 232b2b2dd24SShri Abhyankar 233b2b2dd24SShri Abhyankar /* copy data into free space, then initialize lnk */ 2349566063dSJacob Faibussowitsch PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt)); 23526fbe8dcSKarl Rupp 236b2b2dd24SShri Abhyankar bi_ptr[i] = current_space->array; 237b2b2dd24SShri Abhyankar current_space->array += nzi; 238b2b2dd24SShri Abhyankar current_space->local_used += nzi; 239b2b2dd24SShri Abhyankar current_space->local_remaining -= nzi; 240b2b2dd24SShri Abhyankar } 241b2b2dd24SShri Abhyankar 2429566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r)); 2439566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic)); 244b2b2dd24SShri Abhyankar 2459263d837SHong Zhang /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 246*84648c2dSPierre Jolivet PetscCall(PetscMalloc1(bi[n], &bj)); 2479566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag)); 2489566063dSJacob Faibussowitsch PetscCall(PetscLLDestroy(lnk, lnkbt)); 2499566063dSJacob Faibussowitsch PetscCall(PetscFree2(bi_ptr, im)); 250b2b2dd24SShri Abhyankar 251b2b2dd24SShri Abhyankar /* put together the new matrix */ 2529566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(B, bs, MAT_SKIP_ALLOCATION, NULL)); 25357508eceSPierre Jolivet b = (Mat_SeqBAIJ *)B->data; 25426fbe8dcSKarl Rupp 255b2b2dd24SShri Abhyankar b->free_ij = PETSC_TRUE; 2569f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray((bdiag[0] + 1) * bs2, sizeof(PetscScalar), (void **)&b->a)); 2579f0612e4SBarry Smith b->free_a = PETSC_TRUE; 258b2b2dd24SShri Abhyankar b->j = bj; 259b2b2dd24SShri Abhyankar b->i = bi; 260b2b2dd24SShri Abhyankar b->diag = bdiag; 261b2b2dd24SShri Abhyankar b->free_diag = PETSC_TRUE; 262f4259b30SLisandro Dalcin b->ilen = NULL; 263f4259b30SLisandro Dalcin b->imax = NULL; 264b2b2dd24SShri Abhyankar b->row = isrow; 265b2b2dd24SShri Abhyankar b->col = iscol; 266b2b2dd24SShri Abhyankar b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE; 26726fbe8dcSKarl Rupp 2689566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)isrow)); 2699566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)iscol)); 270b2b2dd24SShri Abhyankar b->icol = isicol; 2719566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(bs * n + bs, &b->solve_work)); 272b2b2dd24SShri Abhyankar 273b2b2dd24SShri Abhyankar b->maxnz = b->nz = bdiag[0] + 1; 27426fbe8dcSKarl Rupp 275d5f3da31SBarry Smith B->factortype = MAT_FACTOR_LU; 276b2b2dd24SShri Abhyankar B->info.factor_mallocs = reallocs; 277b2b2dd24SShri Abhyankar B->info.fill_ratio_given = f; 278b2b2dd24SShri Abhyankar 279b2b2dd24SShri Abhyankar if (ai[n] != 0) { 280b2b2dd24SShri Abhyankar B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 281b2b2dd24SShri Abhyankar } else { 282b2b2dd24SShri Abhyankar B->info.fill_ratio_needed = 0.0; 283b2b2dd24SShri Abhyankar } 2849263d837SHong Zhang #if defined(PETSC_USE_INFO) 2859263d837SHong Zhang if (ai[n] != 0) { 2869263d837SHong Zhang PetscReal af = B->info.fill_ratio_needed; 2879566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 2889566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 2899566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af)); 2909566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "for best performance.\n")); 2919263d837SHong Zhang } else { 2929566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Empty matrix\n")); 2939263d837SHong Zhang } 2949263d837SHong Zhang #endif 295b2b2dd24SShri Abhyankar 2969566063dSJacob Faibussowitsch PetscCall(ISIdentity(isrow, &row_identity)); 2979566063dSJacob Faibussowitsch PetscCall(ISIdentity(iscol, &col_identity)); 29826fbe8dcSKarl Rupp 299ace3abfcSBarry Smith both_identity = (PetscBool)(row_identity && col_identity); 30026fbe8dcSKarl Rupp 3019566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetNumericFactorization(B, both_identity)); 3023ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 303b2b2dd24SShri Abhyankar } 304b2b2dd24SShri Abhyankar 305ff6a9541SJacob Faibussowitsch #if 0 306ff6a9541SJacob Faibussowitsch // unused 307ff6a9541SJacob Faibussowitsch static PetscErrorCode MatLUFactorSymbolic_SeqBAIJ_inplace(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 308d71ae5a4SJacob Faibussowitsch { 309faca2338SShri Abhyankar Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b; 310faca2338SShri Abhyankar PetscInt n = a->mbs, bs = A->rmap->bs, bs2 = a->bs2; 311ace3abfcSBarry Smith PetscBool row_identity, col_identity, both_identity; 312faca2338SShri Abhyankar IS isicol; 313faca2338SShri Abhyankar const PetscInt *r, *ic; 314faca2338SShri Abhyankar PetscInt i, *ai = a->i, *aj = a->j; 315faca2338SShri Abhyankar PetscInt *bi, *bj, *ajtmp; 316faca2338SShri Abhyankar PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im; 317faca2338SShri Abhyankar PetscReal f; 318faca2338SShri Abhyankar PetscInt nlnk, *lnk, k, **bi_ptr; 3190298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 320faca2338SShri Abhyankar PetscBT lnkbt; 321ece542f9SHong Zhang PetscBool missing; 322faca2338SShri Abhyankar 323faca2338SShri Abhyankar PetscFunctionBegin; 32494bad497SJacob Faibussowitsch PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square"); 3259566063dSJacob Faibussowitsch PetscCall(MatMissingDiagonal(A, &missing, &i)); 32694bad497SJacob Faibussowitsch PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 327ece542f9SHong Zhang 3289566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 3299566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r)); 3309566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic)); 331faca2338SShri Abhyankar 332bc74c81fSJed Brown /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 3339566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &bi)); 3349566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &bdiag)); 335bc74c81fSJed Brown 336a6df321fSShri Abhyankar bi[0] = bdiag[0] = 0; 337c7272abeSHong Zhang 338c7272abeSHong Zhang /* linked list for storing column indices of the active row */ 339c7272abeSHong Zhang nlnk = n + 1; 3409566063dSJacob Faibussowitsch PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt)); 341c7272abeSHong Zhang 3429566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im)); 343c7272abeSHong Zhang 344c7272abeSHong Zhang /* initial FreeSpace size is f*(ai[n]+1) */ 345c7272abeSHong Zhang f = info->fill; 3469566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 347c7272abeSHong Zhang current_space = free_space; 34883287d42SBarry Smith 34983287d42SBarry Smith for (i = 0; i < n; i++) { 350c7272abeSHong Zhang /* copy previous fill into linked list */ 35183287d42SBarry Smith nzi = 0; 352c7272abeSHong Zhang nnz = ai[r[i] + 1] - ai[r[i]]; 353c7272abeSHong Zhang ajtmp = aj + ai[r[i]]; 3549566063dSJacob Faibussowitsch PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt)); 355c7272abeSHong Zhang nzi += nlnk; 356c7272abeSHong Zhang 357c7272abeSHong Zhang /* add pivot rows into linked list */ 358c7272abeSHong Zhang row = lnk[n]; 359c7272abeSHong Zhang while (row < i) { 360c7272abeSHong Zhang nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 361c7272abeSHong Zhang ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 3629566063dSJacob Faibussowitsch PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im)); 363c7272abeSHong Zhang nzi += nlnk; 364c7272abeSHong Zhang row = lnk[row]; 36583287d42SBarry Smith } 366c7272abeSHong Zhang bi[i + 1] = bi[i] + nzi; 367c7272abeSHong Zhang im[i] = nzi; 368c7272abeSHong Zhang 369c7272abeSHong Zhang /* mark bdiag */ 370c7272abeSHong Zhang nzbd = 0; 371c7272abeSHong Zhang nnz = nzi; 372c7272abeSHong Zhang k = lnk[n]; 373c7272abeSHong Zhang while (nnz-- && k < i) { 374c7272abeSHong Zhang nzbd++; 375c7272abeSHong Zhang k = lnk[k]; 376c7272abeSHong Zhang } 377c7272abeSHong Zhang bdiag[i] = bi[i] + nzbd; 378c7272abeSHong Zhang 379c7272abeSHong Zhang /* if free space is not available, make more free space */ 380c7272abeSHong Zhang if (current_space->local_remaining < nzi) { 381f91af8c7SBarry Smith nnz = PetscIntMultTruncate(n - i, nzi); /* estimated and max additional space needed */ 3829566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 383c7272abeSHong Zhang reallocs++; 38483287d42SBarry Smith } 38583287d42SBarry Smith 386c7272abeSHong Zhang /* copy data into free space, then initialize lnk */ 3879566063dSJacob Faibussowitsch PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt)); 38826fbe8dcSKarl Rupp 389c7272abeSHong Zhang bi_ptr[i] = current_space->array; 390c7272abeSHong Zhang current_space->array += nzi; 391c7272abeSHong Zhang current_space->local_used += nzi; 392c7272abeSHong Zhang current_space->local_remaining -= nzi; 393c7272abeSHong Zhang } 3946cf91177SBarry Smith #if defined(PETSC_USE_INFO) 39583287d42SBarry Smith if (ai[n] != 0) { 396c7272abeSHong Zhang PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 3979566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 3989566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 3999566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af)); 4009566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "for best performance.\n")); 40183287d42SBarry Smith } else { 4029566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Empty matrix\n")); 40383287d42SBarry Smith } 40463ba0a88SBarry Smith #endif 40583287d42SBarry Smith 4069566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r)); 4079566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic)); 40883287d42SBarry Smith 409c7272abeSHong Zhang /* destroy list of free space and other temporary array(s) */ 4109566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(bi[n] + 1, &bj)); 4119566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 4129566063dSJacob Faibussowitsch PetscCall(PetscLLDestroy(lnk, lnkbt)); 4139566063dSJacob Faibussowitsch PetscCall(PetscFree2(bi_ptr, im)); 41483287d42SBarry Smith 41583287d42SBarry Smith /* put together the new matrix */ 4169566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(B, bs, MAT_SKIP_ALLOCATION, NULL)); 41757508eceSPierre Jolivet b = (Mat_SeqBAIJ *)B->data; 418e6b907acSBarry Smith b->free_ij = PETSC_TRUE; 4199f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray((bi[n] + 1) * bs2,,sizeof(PetscScalar),(void **)&b->a)); 4209f0612e4SBarry Smith b->free_a = PETSC_TRUE; 421c7272abeSHong Zhang b->j = bj; 422c7272abeSHong Zhang b->i = bi; 423c7272abeSHong Zhang b->diag = bdiag; 4247f53bb6cSHong Zhang b->free_diag = PETSC_TRUE; 425f4259b30SLisandro Dalcin b->ilen = NULL; 426f4259b30SLisandro Dalcin b->imax = NULL; 42783287d42SBarry Smith b->row = isrow; 42883287d42SBarry Smith b->col = iscol; 429d3d32019SBarry Smith b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE; 43026fbe8dcSKarl Rupp 4319566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)isrow)); 4329566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)iscol)); 43383287d42SBarry Smith b->icol = isicol; 43426fbe8dcSKarl Rupp 4359566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(bs * n + bs, &b->solve_work)); 43683287d42SBarry Smith 437c7272abeSHong Zhang b->maxnz = b->nz = bi[n]; 43826fbe8dcSKarl Rupp 43957508eceSPierre Jolivet B->factortype = MAT_FACTOR_LU; 44057508eceSPierre Jolivet B->info.factor_mallocs = reallocs; 44157508eceSPierre Jolivet B->info.fill_ratio_given = f; 442c7272abeSHong Zhang 44383287d42SBarry Smith if (ai[n] != 0) { 44457508eceSPierre Jolivet B->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 44583287d42SBarry Smith } else { 44657508eceSPierre Jolivet B->info.fill_ratio_needed = 0.0; 44783287d42SBarry Smith } 448c7272abeSHong Zhang 4499566063dSJacob Faibussowitsch PetscCall(ISIdentity(isrow, &row_identity)); 4509566063dSJacob Faibussowitsch PetscCall(ISIdentity(iscol, &col_identity)); 45126fbe8dcSKarl Rupp 452ace3abfcSBarry Smith both_identity = (PetscBool)(row_identity && col_identity); 45326fbe8dcSKarl Rupp 4549566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetNumericFactorization_inplace(B, both_identity)); 4553ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 45683287d42SBarry Smith } 457ff6a9541SJacob Faibussowitsch #endif 458