183287d42SBarry Smith /* 283287d42SBarry Smith Factorization code for BAIJ format. 383287d42SBarry Smith */ 4c6db04a5SJed Brown #include <../src/mat/impls/baij/seq/baij.h> 5af0996ceSBarry Smith #include <petsc/private/kernels/blockinvert.h> 683287d42SBarry Smith 7db4efbfdSBarry Smith /* 8db4efbfdSBarry Smith This is used to set the numeric factorization for both LU and ILU symbolic factorization 9db4efbfdSBarry Smith */ 10d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat fact, PetscBool natural) 11d71ae5a4SJacob Faibussowitsch { 12ae3d28f0SHong Zhang PetscFunctionBegin; 136929473cSShri Abhyankar if (natural) { 146929473cSShri Abhyankar switch (fact->rmap->bs) { 15d71ae5a4SJacob Faibussowitsch case 1: 16d71ae5a4SJacob Faibussowitsch fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 17d71ae5a4SJacob Faibussowitsch break; 18d71ae5a4SJacob Faibussowitsch case 2: 19d71ae5a4SJacob Faibussowitsch fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering; 20d71ae5a4SJacob Faibussowitsch break; 21d71ae5a4SJacob Faibussowitsch case 3: 22d71ae5a4SJacob Faibussowitsch fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering; 23d71ae5a4SJacob Faibussowitsch break; 24d71ae5a4SJacob Faibussowitsch case 4: 25d71ae5a4SJacob Faibussowitsch fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering; 26d71ae5a4SJacob Faibussowitsch break; 27d71ae5a4SJacob Faibussowitsch case 5: 28d71ae5a4SJacob Faibussowitsch fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering; 29d71ae5a4SJacob Faibussowitsch break; 30d71ae5a4SJacob Faibussowitsch case 6: 31d71ae5a4SJacob Faibussowitsch fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering; 32d71ae5a4SJacob Faibussowitsch break; 33d71ae5a4SJacob Faibussowitsch case 7: 34d71ae5a4SJacob Faibussowitsch fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering; 35d71ae5a4SJacob Faibussowitsch break; 3696e086a2SDaniel Kokron case 9: 375f70456aSHong Zhang #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 3896e086a2SDaniel Kokron fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_9_NaturalOrdering; 39aee5c371SBarry Smith #else 40aee5c371SBarry Smith fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 41aee5c371SBarry Smith #endif 4296e086a2SDaniel Kokron break; 43d71ae5a4SJacob Faibussowitsch case 15: 44d71ae5a4SJacob Faibussowitsch fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_15_NaturalOrdering; 45d71ae5a4SJacob Faibussowitsch break; 46d71ae5a4SJacob Faibussowitsch default: 47d71ae5a4SJacob Faibussowitsch fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 48d71ae5a4SJacob Faibussowitsch break; 496929473cSShri Abhyankar } 506ba06ab7SHong Zhang } else { 51ae3d28f0SHong Zhang switch (fact->rmap->bs) { 52d71ae5a4SJacob Faibussowitsch case 1: 53d71ae5a4SJacob Faibussowitsch fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 54d71ae5a4SJacob Faibussowitsch break; 55d71ae5a4SJacob Faibussowitsch case 2: 56d71ae5a4SJacob Faibussowitsch fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 57d71ae5a4SJacob Faibussowitsch break; 58d71ae5a4SJacob Faibussowitsch case 3: 59d71ae5a4SJacob Faibussowitsch fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 60d71ae5a4SJacob Faibussowitsch break; 61d71ae5a4SJacob Faibussowitsch case 4: 62d71ae5a4SJacob Faibussowitsch fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 63d71ae5a4SJacob Faibussowitsch break; 64d71ae5a4SJacob Faibussowitsch case 5: 65d71ae5a4SJacob Faibussowitsch fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 66d71ae5a4SJacob Faibussowitsch break; 67d71ae5a4SJacob Faibussowitsch case 6: 68d71ae5a4SJacob Faibussowitsch fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 69d71ae5a4SJacob Faibussowitsch break; 70d71ae5a4SJacob Faibussowitsch case 7: 71d71ae5a4SJacob Faibussowitsch fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 72d71ae5a4SJacob Faibussowitsch break; 73d71ae5a4SJacob Faibussowitsch default: 74d71ae5a4SJacob Faibussowitsch fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 75d71ae5a4SJacob Faibussowitsch break; 76ae3d28f0SHong Zhang } 776929473cSShri Abhyankar } 783ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 79ae3d28f0SHong Zhang } 80ae3d28f0SHong Zhang 81d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSeqBAIJSetNumericFactorization_inplace(Mat inA, PetscBool natural) 82d71ae5a4SJacob Faibussowitsch { 83db4efbfdSBarry Smith PetscFunctionBegin; 84db4efbfdSBarry Smith if (natural) { 85db4efbfdSBarry Smith switch (inA->rmap->bs) { 86d71ae5a4SJacob Faibussowitsch case 1: 87d71ae5a4SJacob Faibussowitsch inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace; 88d71ae5a4SJacob Faibussowitsch break; 89d71ae5a4SJacob Faibussowitsch case 2: 90d71ae5a4SJacob Faibussowitsch inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_inplace; 91d71ae5a4SJacob Faibussowitsch break; 92d71ae5a4SJacob Faibussowitsch case 3: 93d71ae5a4SJacob Faibussowitsch inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering_inplace; 94d71ae5a4SJacob Faibussowitsch break; 95db4efbfdSBarry Smith case 4: 96ce63c4c1SBarry Smith #if defined(PETSC_USE_REAL_MAT_SINGLE) 97db4efbfdSBarry Smith { 98ace3abfcSBarry Smith PetscBool sse_enabled_local; 999566063dSJacob Faibussowitsch PetscCall(PetscSSEIsEnabled(inA->comm, &sse_enabled_local, NULL)); 100db4efbfdSBarry Smith if (sse_enabled_local) { 101db4efbfdSBarry Smith #if defined(PETSC_HAVE_SSE) 102db4efbfdSBarry Smith int i, *AJ = a->j, nz = a->nz, n = a->mbs; 103db4efbfdSBarry Smith if (n == (unsigned short)n) { 104db4efbfdSBarry Smith unsigned short *aj = (unsigned short *)AJ; 10526fbe8dcSKarl Rupp for (i = 0; i < nz; i++) aj[i] = (unsigned short)AJ[i]; 10626fbe8dcSKarl Rupp 107db4efbfdSBarry Smith inA->ops->setunfactored = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE_usj; 108db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE_usj; 10926fbe8dcSKarl Rupp 1109566063dSJacob Faibussowitsch PetscCall(PetscInfo(inA, "Using special SSE, in-place natural ordering, ushort j index factor BS=4\n")); 111db4efbfdSBarry Smith } else { 112db4efbfdSBarry Smith /* Scale the column indices for easier indexing in MatSolve. */ 113db4efbfdSBarry Smith /* for (i=0;i<nz;i++) { */ 114db4efbfdSBarry Smith /* AJ[i] = AJ[i]*4; */ 115db4efbfdSBarry Smith /* } */ 116db4efbfdSBarry Smith inA->ops->setunfactored = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE; 117db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE; 11826fbe8dcSKarl Rupp 1199566063dSJacob Faibussowitsch PetscCall(PetscInfo(inA, "Using special SSE, in-place natural ordering, int j index factor BS=4\n")); 120db4efbfdSBarry Smith } 121db4efbfdSBarry Smith #else 122db4efbfdSBarry Smith /* This should never be reached. If so, problem in PetscSSEIsEnabled. */ 123e32f2f54SBarry Smith SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "SSE Hardware unavailable"); 124db4efbfdSBarry Smith #endif 125db4efbfdSBarry Smith } else { 12606e38f1dSHong Zhang inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace; 127db4efbfdSBarry Smith } 128db4efbfdSBarry Smith } 129db4efbfdSBarry Smith #else 13006e38f1dSHong Zhang inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace; 131db4efbfdSBarry Smith #endif 132db4efbfdSBarry Smith break; 133d71ae5a4SJacob Faibussowitsch case 5: 134d71ae5a4SJacob Faibussowitsch inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering_inplace; 135d71ae5a4SJacob Faibussowitsch break; 136d71ae5a4SJacob Faibussowitsch case 6: 137d71ae5a4SJacob Faibussowitsch inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering_inplace; 138d71ae5a4SJacob Faibussowitsch break; 139d71ae5a4SJacob Faibussowitsch case 7: 140d71ae5a4SJacob Faibussowitsch inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering_inplace; 141d71ae5a4SJacob Faibussowitsch break; 142d71ae5a4SJacob Faibussowitsch default: 143d71ae5a4SJacob Faibussowitsch inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace; 144d71ae5a4SJacob Faibussowitsch break; 145db4efbfdSBarry Smith } 146db4efbfdSBarry Smith } else { 147db4efbfdSBarry Smith switch (inA->rmap->bs) { 148d71ae5a4SJacob Faibussowitsch case 1: 149d71ae5a4SJacob Faibussowitsch inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace; 150d71ae5a4SJacob Faibussowitsch break; 151d71ae5a4SJacob Faibussowitsch case 2: 152d71ae5a4SJacob Faibussowitsch inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_inplace; 153d71ae5a4SJacob Faibussowitsch break; 154d71ae5a4SJacob Faibussowitsch case 3: 155d71ae5a4SJacob Faibussowitsch inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_inplace; 156d71ae5a4SJacob Faibussowitsch break; 157d71ae5a4SJacob Faibussowitsch case 4: 158d71ae5a4SJacob Faibussowitsch inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_inplace; 159d71ae5a4SJacob Faibussowitsch break; 160d71ae5a4SJacob Faibussowitsch case 5: 161d71ae5a4SJacob Faibussowitsch inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_inplace; 162d71ae5a4SJacob Faibussowitsch break; 163d71ae5a4SJacob Faibussowitsch case 6: 164d71ae5a4SJacob Faibussowitsch inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_inplace; 165d71ae5a4SJacob Faibussowitsch break; 166d71ae5a4SJacob Faibussowitsch case 7: 167d71ae5a4SJacob Faibussowitsch inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_inplace; 168d71ae5a4SJacob Faibussowitsch break; 169d71ae5a4SJacob Faibussowitsch default: 170d71ae5a4SJacob Faibussowitsch inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace; 171d71ae5a4SJacob Faibussowitsch break; 172db4efbfdSBarry Smith } 173db4efbfdSBarry Smith } 1743ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 175db4efbfdSBarry Smith } 176db4efbfdSBarry Smith 17783287d42SBarry Smith /* 17883287d42SBarry Smith The symbolic factorization code is identical to that for AIJ format, 17983287d42SBarry Smith except for very small changes since this is now a SeqBAIJ datastructure. 18083287d42SBarry Smith NOT good code reuse. 18183287d42SBarry Smith */ 182c6db04a5SJed Brown #include <petscbt.h> 183c6db04a5SJed Brown #include <../src/mat/utils/freespace.h> 184c7272abeSHong Zhang 185d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 186d71ae5a4SJacob Faibussowitsch { 18783287d42SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b; 188c7272abeSHong Zhang PetscInt n = a->mbs, bs = A->rmap->bs, bs2 = a->bs2; 189ace3abfcSBarry Smith PetscBool row_identity, col_identity, both_identity; 19083287d42SBarry Smith IS isicol; 191c7272abeSHong Zhang const PetscInt *r, *ic; 192c7272abeSHong Zhang PetscInt i, *ai = a->i, *aj = a->j; 193c7272abeSHong Zhang PetscInt *bi, *bj, *ajtmp; 194c7272abeSHong Zhang PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im; 195c7272abeSHong Zhang PetscReal f; 196c7272abeSHong Zhang PetscInt nlnk, *lnk, k, **bi_ptr; 1970298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 198c7272abeSHong Zhang PetscBT lnkbt; 199ece542f9SHong Zhang PetscBool missing; 20083287d42SBarry Smith 20183287d42SBarry Smith PetscFunctionBegin; 20294bad497SJacob Faibussowitsch PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square"); 2039566063dSJacob Faibussowitsch PetscCall(MatMissingDiagonal(A, &missing, &i)); 20494bad497SJacob Faibussowitsch PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 205ece542f9SHong Zhang 2066ba06ab7SHong Zhang if (bs > 1) { /* check shifttype */ 20713bcc0bdSJacob Faibussowitsch PetscCheck(info->shifttype != (PetscReal)MAT_SHIFT_NONZERO && info->shifttype != (PetscReal)MAT_SHIFT_POSITIVE_DEFINITE, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only MAT_SHIFT_NONE and MAT_SHIFT_INBLOCKS are supported for BAIJ matrix"); 2086ba06ab7SHong Zhang } 2096ba06ab7SHong Zhang 2109566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 2119566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r)); 2129566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic)); 21383287d42SBarry Smith 214bc74c81fSJed Brown /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 2159566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &bi)); 2169566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &bdiag)); 217a6df321fSShri Abhyankar bi[0] = bdiag[0] = 0; 218b2b2dd24SShri Abhyankar 219b2b2dd24SShri Abhyankar /* linked list for storing column indices of the active row */ 220b2b2dd24SShri Abhyankar nlnk = n + 1; 2219566063dSJacob Faibussowitsch PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt)); 222b2b2dd24SShri Abhyankar 2239566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im)); 224b2b2dd24SShri Abhyankar 225b2b2dd24SShri Abhyankar /* initial FreeSpace size is f*(ai[n]+1) */ 226b2b2dd24SShri Abhyankar f = info->fill; 2279566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 22826fbe8dcSKarl Rupp 229b2b2dd24SShri Abhyankar current_space = free_space; 230b2b2dd24SShri Abhyankar 231b2b2dd24SShri Abhyankar for (i = 0; i < n; i++) { 232b2b2dd24SShri Abhyankar /* copy previous fill into linked list */ 233b2b2dd24SShri Abhyankar nzi = 0; 234b2b2dd24SShri Abhyankar nnz = ai[r[i] + 1] - ai[r[i]]; 235b2b2dd24SShri Abhyankar ajtmp = aj + ai[r[i]]; 2369566063dSJacob Faibussowitsch PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt)); 237b2b2dd24SShri Abhyankar nzi += nlnk; 238b2b2dd24SShri Abhyankar 239b2b2dd24SShri Abhyankar /* add pivot rows into linked list */ 240b2b2dd24SShri Abhyankar row = lnk[n]; 241b2b2dd24SShri Abhyankar while (row < i) { 242b2b2dd24SShri Abhyankar nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 243b2b2dd24SShri Abhyankar ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 2449566063dSJacob Faibussowitsch PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im)); 245b2b2dd24SShri Abhyankar nzi += nlnk; 246b2b2dd24SShri Abhyankar row = lnk[row]; 247b2b2dd24SShri Abhyankar } 248b2b2dd24SShri Abhyankar bi[i + 1] = bi[i] + nzi; 249b2b2dd24SShri Abhyankar im[i] = nzi; 250b2b2dd24SShri Abhyankar 251b2b2dd24SShri Abhyankar /* mark bdiag */ 252b2b2dd24SShri Abhyankar nzbd = 0; 253b2b2dd24SShri Abhyankar nnz = nzi; 254b2b2dd24SShri Abhyankar k = lnk[n]; 255b2b2dd24SShri Abhyankar while (nnz-- && k < i) { 256b2b2dd24SShri Abhyankar nzbd++; 257b2b2dd24SShri Abhyankar k = lnk[k]; 258b2b2dd24SShri Abhyankar } 2592ce24eb6SHong Zhang bdiag[i] = nzbd; /* note : bdaig[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */ 260b2b2dd24SShri Abhyankar 261b2b2dd24SShri Abhyankar /* if free space is not available, make more free space */ 262b2b2dd24SShri Abhyankar if (current_space->local_remaining < nzi) { 263f91af8c7SBarry Smith nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - i, nzi)); /* estimated and max additional space needed */ 2649566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 265b2b2dd24SShri Abhyankar reallocs++; 266b2b2dd24SShri Abhyankar } 267b2b2dd24SShri Abhyankar 268b2b2dd24SShri Abhyankar /* copy data into free space, then initialize lnk */ 2699566063dSJacob Faibussowitsch PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt)); 27026fbe8dcSKarl Rupp 271b2b2dd24SShri Abhyankar bi_ptr[i] = current_space->array; 272b2b2dd24SShri Abhyankar current_space->array += nzi; 273b2b2dd24SShri Abhyankar current_space->local_used += nzi; 274b2b2dd24SShri Abhyankar current_space->local_remaining -= nzi; 275b2b2dd24SShri Abhyankar } 276b2b2dd24SShri Abhyankar 2779566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r)); 2789566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic)); 279b2b2dd24SShri Abhyankar 2809263d837SHong Zhang /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 2819566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(bi[n] + 1, &bj)); 2829566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag)); 2839566063dSJacob Faibussowitsch PetscCall(PetscLLDestroy(lnk, lnkbt)); 2849566063dSJacob Faibussowitsch PetscCall(PetscFree2(bi_ptr, im)); 285b2b2dd24SShri Abhyankar 286b2b2dd24SShri Abhyankar /* put together the new matrix */ 2879566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(B, bs, MAT_SKIP_ALLOCATION, NULL)); 288*57508eceSPierre Jolivet b = (Mat_SeqBAIJ *)B->data; 28926fbe8dcSKarl Rupp 290b2b2dd24SShri Abhyankar b->free_ij = PETSC_TRUE; 2919f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray((bdiag[0] + 1) * bs2, sizeof(PetscScalar), (void **)&b->a)); 2929f0612e4SBarry Smith b->free_a = PETSC_TRUE; 293b2b2dd24SShri Abhyankar b->j = bj; 294b2b2dd24SShri Abhyankar b->i = bi; 295b2b2dd24SShri Abhyankar b->diag = bdiag; 296b2b2dd24SShri Abhyankar b->free_diag = PETSC_TRUE; 297f4259b30SLisandro Dalcin b->ilen = NULL; 298f4259b30SLisandro Dalcin b->imax = NULL; 299b2b2dd24SShri Abhyankar b->row = isrow; 300b2b2dd24SShri Abhyankar b->col = iscol; 301b2b2dd24SShri Abhyankar b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE; 30226fbe8dcSKarl Rupp 3039566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)isrow)); 3049566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)iscol)); 305b2b2dd24SShri Abhyankar b->icol = isicol; 3069566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(bs * n + bs, &b->solve_work)); 307b2b2dd24SShri Abhyankar 308b2b2dd24SShri Abhyankar b->maxnz = b->nz = bdiag[0] + 1; 30926fbe8dcSKarl Rupp 310d5f3da31SBarry Smith B->factortype = MAT_FACTOR_LU; 311b2b2dd24SShri Abhyankar B->info.factor_mallocs = reallocs; 312b2b2dd24SShri Abhyankar B->info.fill_ratio_given = f; 313b2b2dd24SShri Abhyankar 314b2b2dd24SShri Abhyankar if (ai[n] != 0) { 315b2b2dd24SShri Abhyankar B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 316b2b2dd24SShri Abhyankar } else { 317b2b2dd24SShri Abhyankar B->info.fill_ratio_needed = 0.0; 318b2b2dd24SShri Abhyankar } 3199263d837SHong Zhang #if defined(PETSC_USE_INFO) 3209263d837SHong Zhang if (ai[n] != 0) { 3219263d837SHong Zhang PetscReal af = B->info.fill_ratio_needed; 3229566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 3239566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 3249566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af)); 3259566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "for best performance.\n")); 3269263d837SHong Zhang } else { 3279566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Empty matrix\n")); 3289263d837SHong Zhang } 3299263d837SHong Zhang #endif 330b2b2dd24SShri Abhyankar 3319566063dSJacob Faibussowitsch PetscCall(ISIdentity(isrow, &row_identity)); 3329566063dSJacob Faibussowitsch PetscCall(ISIdentity(iscol, &col_identity)); 33326fbe8dcSKarl Rupp 334ace3abfcSBarry Smith both_identity = (PetscBool)(row_identity && col_identity); 33526fbe8dcSKarl Rupp 3369566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetNumericFactorization(B, both_identity)); 3373ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 338b2b2dd24SShri Abhyankar } 339b2b2dd24SShri Abhyankar 340ff6a9541SJacob Faibussowitsch #if 0 341ff6a9541SJacob Faibussowitsch // unused 342ff6a9541SJacob Faibussowitsch static PetscErrorCode MatLUFactorSymbolic_SeqBAIJ_inplace(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 343d71ae5a4SJacob Faibussowitsch { 344faca2338SShri Abhyankar Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b; 345faca2338SShri Abhyankar PetscInt n = a->mbs, bs = A->rmap->bs, bs2 = a->bs2; 346ace3abfcSBarry Smith PetscBool row_identity, col_identity, both_identity; 347faca2338SShri Abhyankar IS isicol; 348faca2338SShri Abhyankar const PetscInt *r, *ic; 349faca2338SShri Abhyankar PetscInt i, *ai = a->i, *aj = a->j; 350faca2338SShri Abhyankar PetscInt *bi, *bj, *ajtmp; 351faca2338SShri Abhyankar PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im; 352faca2338SShri Abhyankar PetscReal f; 353faca2338SShri Abhyankar PetscInt nlnk, *lnk, k, **bi_ptr; 3540298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 355faca2338SShri Abhyankar PetscBT lnkbt; 356ece542f9SHong Zhang PetscBool missing; 357faca2338SShri Abhyankar 358faca2338SShri Abhyankar PetscFunctionBegin; 35994bad497SJacob Faibussowitsch PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square"); 3609566063dSJacob Faibussowitsch PetscCall(MatMissingDiagonal(A, &missing, &i)); 36194bad497SJacob Faibussowitsch PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 362ece542f9SHong Zhang 3639566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 3649566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r)); 3659566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic)); 366faca2338SShri Abhyankar 367bc74c81fSJed Brown /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 3689566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &bi)); 3699566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &bdiag)); 370bc74c81fSJed Brown 371a6df321fSShri Abhyankar bi[0] = bdiag[0] = 0; 372c7272abeSHong Zhang 373c7272abeSHong Zhang /* linked list for storing column indices of the active row */ 374c7272abeSHong Zhang nlnk = n + 1; 3759566063dSJacob Faibussowitsch PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt)); 376c7272abeSHong Zhang 3779566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im)); 378c7272abeSHong Zhang 379c7272abeSHong Zhang /* initial FreeSpace size is f*(ai[n]+1) */ 380c7272abeSHong Zhang f = info->fill; 3819566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 382c7272abeSHong Zhang current_space = free_space; 38383287d42SBarry Smith 38483287d42SBarry Smith for (i = 0; i < n; i++) { 385c7272abeSHong Zhang /* copy previous fill into linked list */ 38683287d42SBarry Smith nzi = 0; 387c7272abeSHong Zhang nnz = ai[r[i] + 1] - ai[r[i]]; 388c7272abeSHong Zhang ajtmp = aj + ai[r[i]]; 3899566063dSJacob Faibussowitsch PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt)); 390c7272abeSHong Zhang nzi += nlnk; 391c7272abeSHong Zhang 392c7272abeSHong Zhang /* add pivot rows into linked list */ 393c7272abeSHong Zhang row = lnk[n]; 394c7272abeSHong Zhang while (row < i) { 395c7272abeSHong Zhang nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 396c7272abeSHong Zhang ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 3979566063dSJacob Faibussowitsch PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im)); 398c7272abeSHong Zhang nzi += nlnk; 399c7272abeSHong Zhang row = lnk[row]; 40083287d42SBarry Smith } 401c7272abeSHong Zhang bi[i + 1] = bi[i] + nzi; 402c7272abeSHong Zhang im[i] = nzi; 403c7272abeSHong Zhang 404c7272abeSHong Zhang /* mark bdiag */ 405c7272abeSHong Zhang nzbd = 0; 406c7272abeSHong Zhang nnz = nzi; 407c7272abeSHong Zhang k = lnk[n]; 408c7272abeSHong Zhang while (nnz-- && k < i) { 409c7272abeSHong Zhang nzbd++; 410c7272abeSHong Zhang k = lnk[k]; 411c7272abeSHong Zhang } 412c7272abeSHong Zhang bdiag[i] = bi[i] + nzbd; 413c7272abeSHong Zhang 414c7272abeSHong Zhang /* if free space is not available, make more free space */ 415c7272abeSHong Zhang if (current_space->local_remaining < nzi) { 416f91af8c7SBarry Smith nnz = PetscIntMultTruncate(n - i, nzi); /* estimated and max additional space needed */ 4179566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 418c7272abeSHong Zhang reallocs++; 41983287d42SBarry Smith } 42083287d42SBarry Smith 421c7272abeSHong Zhang /* copy data into free space, then initialize lnk */ 4229566063dSJacob Faibussowitsch PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt)); 42326fbe8dcSKarl Rupp 424c7272abeSHong Zhang bi_ptr[i] = current_space->array; 425c7272abeSHong Zhang current_space->array += nzi; 426c7272abeSHong Zhang current_space->local_used += nzi; 427c7272abeSHong Zhang current_space->local_remaining -= nzi; 428c7272abeSHong Zhang } 4296cf91177SBarry Smith #if defined(PETSC_USE_INFO) 43083287d42SBarry Smith if (ai[n] != 0) { 431c7272abeSHong Zhang PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 4329566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 4339566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 4349566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af)); 4359566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "for best performance.\n")); 43683287d42SBarry Smith } else { 4379566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Empty matrix\n")); 43883287d42SBarry Smith } 43963ba0a88SBarry Smith #endif 44083287d42SBarry Smith 4419566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r)); 4429566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic)); 44383287d42SBarry Smith 444c7272abeSHong Zhang /* destroy list of free space and other temporary array(s) */ 4459566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(bi[n] + 1, &bj)); 4469566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 4479566063dSJacob Faibussowitsch PetscCall(PetscLLDestroy(lnk, lnkbt)); 4489566063dSJacob Faibussowitsch PetscCall(PetscFree2(bi_ptr, im)); 44983287d42SBarry Smith 45083287d42SBarry Smith /* put together the new matrix */ 4519566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(B, bs, MAT_SKIP_ALLOCATION, NULL)); 452*57508eceSPierre Jolivet b = (Mat_SeqBAIJ *)B->data; 453e6b907acSBarry Smith b->free_ij = PETSC_TRUE; 4549f0612e4SBarry Smith PetscCall(PetscShmgetAllocateArray((bi[n] + 1) * bs2,,sizeof(PetscScalar),(void **)&b->a)); 4559f0612e4SBarry Smith b->free_a = PETSC_TRUE; 456c7272abeSHong Zhang b->j = bj; 457c7272abeSHong Zhang b->i = bi; 458c7272abeSHong Zhang b->diag = bdiag; 4597f53bb6cSHong Zhang b->free_diag = PETSC_TRUE; 460f4259b30SLisandro Dalcin b->ilen = NULL; 461f4259b30SLisandro Dalcin b->imax = NULL; 46283287d42SBarry Smith b->row = isrow; 46383287d42SBarry Smith b->col = iscol; 464d3d32019SBarry Smith b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE; 46526fbe8dcSKarl Rupp 4669566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)isrow)); 4679566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)iscol)); 46883287d42SBarry Smith b->icol = isicol; 46926fbe8dcSKarl Rupp 4709566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(bs * n + bs, &b->solve_work)); 47183287d42SBarry Smith 472c7272abeSHong Zhang b->maxnz = b->nz = bi[n]; 47326fbe8dcSKarl Rupp 474*57508eceSPierre Jolivet B->factortype = MAT_FACTOR_LU; 475*57508eceSPierre Jolivet B->info.factor_mallocs = reallocs; 476*57508eceSPierre Jolivet B->info.fill_ratio_given = f; 477c7272abeSHong Zhang 47883287d42SBarry Smith if (ai[n] != 0) { 479*57508eceSPierre Jolivet B->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 48083287d42SBarry Smith } else { 481*57508eceSPierre Jolivet B->info.fill_ratio_needed = 0.0; 48283287d42SBarry Smith } 483c7272abeSHong Zhang 4849566063dSJacob Faibussowitsch PetscCall(ISIdentity(isrow, &row_identity)); 4859566063dSJacob Faibussowitsch PetscCall(ISIdentity(iscol, &col_identity)); 48626fbe8dcSKarl Rupp 487ace3abfcSBarry Smith both_identity = (PetscBool)(row_identity && col_identity); 48826fbe8dcSKarl Rupp 4899566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetNumericFactorization_inplace(B, both_identity)); 4903ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 49183287d42SBarry Smith } 492ff6a9541SJacob Faibussowitsch #endif 493