1be1d678aSKris Buschelman 283287d42SBarry Smith /* 383287d42SBarry Smith Factorization code for BAIJ format. 483287d42SBarry Smith */ 5c6db04a5SJed Brown #include <../src/mat/impls/baij/seq/baij.h> 6af0996ceSBarry Smith #include <petsc/private/kernels/blockinvert.h> 783287d42SBarry Smith 8db4efbfdSBarry Smith /* 9db4efbfdSBarry Smith This is used to set the numeric factorization for both LU and ILU symbolic factorization 10db4efbfdSBarry Smith */ 11d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat fact, PetscBool natural) 12d71ae5a4SJacob Faibussowitsch { 13ae3d28f0SHong Zhang PetscFunctionBegin; 146929473cSShri Abhyankar if (natural) { 156929473cSShri Abhyankar switch (fact->rmap->bs) { 16d71ae5a4SJacob Faibussowitsch case 1: 17d71ae5a4SJacob Faibussowitsch fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 18d71ae5a4SJacob Faibussowitsch break; 19d71ae5a4SJacob Faibussowitsch case 2: 20d71ae5a4SJacob Faibussowitsch fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering; 21d71ae5a4SJacob Faibussowitsch break; 22d71ae5a4SJacob Faibussowitsch case 3: 23d71ae5a4SJacob Faibussowitsch fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering; 24d71ae5a4SJacob Faibussowitsch break; 25d71ae5a4SJacob Faibussowitsch case 4: 26d71ae5a4SJacob Faibussowitsch fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering; 27d71ae5a4SJacob Faibussowitsch break; 28d71ae5a4SJacob Faibussowitsch case 5: 29d71ae5a4SJacob Faibussowitsch fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering; 30d71ae5a4SJacob Faibussowitsch break; 31d71ae5a4SJacob Faibussowitsch case 6: 32d71ae5a4SJacob Faibussowitsch fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering; 33d71ae5a4SJacob Faibussowitsch break; 34d71ae5a4SJacob Faibussowitsch case 7: 35d71ae5a4SJacob Faibussowitsch fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering; 36d71ae5a4SJacob Faibussowitsch break; 3796e086a2SDaniel Kokron case 9: 385f70456aSHong Zhang #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES) 3996e086a2SDaniel Kokron fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_9_NaturalOrdering; 40aee5c371SBarry Smith #else 41aee5c371SBarry Smith fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 42aee5c371SBarry Smith #endif 4396e086a2SDaniel Kokron break; 44d71ae5a4SJacob Faibussowitsch case 15: 45d71ae5a4SJacob Faibussowitsch fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_15_NaturalOrdering; 46d71ae5a4SJacob Faibussowitsch break; 47d71ae5a4SJacob Faibussowitsch default: 48d71ae5a4SJacob Faibussowitsch fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 49d71ae5a4SJacob Faibussowitsch break; 506929473cSShri Abhyankar } 516ba06ab7SHong Zhang } else { 52ae3d28f0SHong Zhang switch (fact->rmap->bs) { 53d71ae5a4SJacob Faibussowitsch case 1: 54d71ae5a4SJacob Faibussowitsch fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1; 55d71ae5a4SJacob Faibussowitsch break; 56d71ae5a4SJacob Faibussowitsch case 2: 57d71ae5a4SJacob Faibussowitsch fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2; 58d71ae5a4SJacob Faibussowitsch break; 59d71ae5a4SJacob Faibussowitsch case 3: 60d71ae5a4SJacob Faibussowitsch fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3; 61d71ae5a4SJacob Faibussowitsch break; 62d71ae5a4SJacob Faibussowitsch case 4: 63d71ae5a4SJacob Faibussowitsch fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4; 64d71ae5a4SJacob Faibussowitsch break; 65d71ae5a4SJacob Faibussowitsch case 5: 66d71ae5a4SJacob Faibussowitsch fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5; 67d71ae5a4SJacob Faibussowitsch break; 68d71ae5a4SJacob Faibussowitsch case 6: 69d71ae5a4SJacob Faibussowitsch fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6; 70d71ae5a4SJacob Faibussowitsch break; 71d71ae5a4SJacob Faibussowitsch case 7: 72d71ae5a4SJacob Faibussowitsch fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7; 73d71ae5a4SJacob Faibussowitsch break; 74d71ae5a4SJacob Faibussowitsch default: 75d71ae5a4SJacob Faibussowitsch fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N; 76d71ae5a4SJacob Faibussowitsch break; 77ae3d28f0SHong Zhang } 786929473cSShri Abhyankar } 793ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 80ae3d28f0SHong Zhang } 81ae3d28f0SHong Zhang 82d71ae5a4SJacob Faibussowitsch PetscErrorCode MatSeqBAIJSetNumericFactorization_inplace(Mat inA, PetscBool natural) 83d71ae5a4SJacob Faibussowitsch { 84db4efbfdSBarry Smith PetscFunctionBegin; 85db4efbfdSBarry Smith if (natural) { 86db4efbfdSBarry Smith switch (inA->rmap->bs) { 87d71ae5a4SJacob Faibussowitsch case 1: 88d71ae5a4SJacob Faibussowitsch inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace; 89d71ae5a4SJacob Faibussowitsch break; 90d71ae5a4SJacob Faibussowitsch case 2: 91d71ae5a4SJacob Faibussowitsch inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_inplace; 92d71ae5a4SJacob Faibussowitsch break; 93d71ae5a4SJacob Faibussowitsch case 3: 94d71ae5a4SJacob Faibussowitsch inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering_inplace; 95d71ae5a4SJacob Faibussowitsch break; 96db4efbfdSBarry Smith case 4: 97ce63c4c1SBarry Smith #if defined(PETSC_USE_REAL_MAT_SINGLE) 98db4efbfdSBarry Smith { 99ace3abfcSBarry Smith PetscBool sse_enabled_local; 1009566063dSJacob Faibussowitsch PetscCall(PetscSSEIsEnabled(inA->comm, &sse_enabled_local, NULL)); 101db4efbfdSBarry Smith if (sse_enabled_local) { 102db4efbfdSBarry Smith #if defined(PETSC_HAVE_SSE) 103db4efbfdSBarry Smith int i, *AJ = a->j, nz = a->nz, n = a->mbs; 104db4efbfdSBarry Smith if (n == (unsigned short)n) { 105db4efbfdSBarry Smith unsigned short *aj = (unsigned short *)AJ; 10626fbe8dcSKarl Rupp for (i = 0; i < nz; i++) aj[i] = (unsigned short)AJ[i]; 10726fbe8dcSKarl Rupp 108db4efbfdSBarry Smith inA->ops->setunfactored = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE_usj; 109db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE_usj; 11026fbe8dcSKarl Rupp 1119566063dSJacob Faibussowitsch PetscCall(PetscInfo(inA, "Using special SSE, in-place natural ordering, ushort j index factor BS=4\n")); 112db4efbfdSBarry Smith } else { 113db4efbfdSBarry Smith /* Scale the column indices for easier indexing in MatSolve. */ 114db4efbfdSBarry Smith /* for (i=0;i<nz;i++) { */ 115db4efbfdSBarry Smith /* AJ[i] = AJ[i]*4; */ 116db4efbfdSBarry Smith /* } */ 117db4efbfdSBarry Smith inA->ops->setunfactored = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE; 118db4efbfdSBarry Smith inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE; 11926fbe8dcSKarl Rupp 1209566063dSJacob Faibussowitsch PetscCall(PetscInfo(inA, "Using special SSE, in-place natural ordering, int j index factor BS=4\n")); 121db4efbfdSBarry Smith } 122db4efbfdSBarry Smith #else 123db4efbfdSBarry Smith /* This should never be reached. If so, problem in PetscSSEIsEnabled. */ 124e32f2f54SBarry Smith SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP, "SSE Hardware unavailable"); 125db4efbfdSBarry Smith #endif 126db4efbfdSBarry Smith } else { 12706e38f1dSHong Zhang inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace; 128db4efbfdSBarry Smith } 129db4efbfdSBarry Smith } 130db4efbfdSBarry Smith #else 13106e38f1dSHong Zhang inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace; 132db4efbfdSBarry Smith #endif 133db4efbfdSBarry Smith break; 134d71ae5a4SJacob Faibussowitsch case 5: 135d71ae5a4SJacob Faibussowitsch inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering_inplace; 136d71ae5a4SJacob Faibussowitsch break; 137d71ae5a4SJacob Faibussowitsch case 6: 138d71ae5a4SJacob Faibussowitsch inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering_inplace; 139d71ae5a4SJacob Faibussowitsch break; 140d71ae5a4SJacob Faibussowitsch case 7: 141d71ae5a4SJacob Faibussowitsch inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering_inplace; 142d71ae5a4SJacob Faibussowitsch break; 143d71ae5a4SJacob Faibussowitsch default: 144d71ae5a4SJacob Faibussowitsch inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace; 145d71ae5a4SJacob Faibussowitsch break; 146db4efbfdSBarry Smith } 147db4efbfdSBarry Smith } else { 148db4efbfdSBarry Smith switch (inA->rmap->bs) { 149d71ae5a4SJacob Faibussowitsch case 1: 150d71ae5a4SJacob Faibussowitsch inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace; 151d71ae5a4SJacob Faibussowitsch break; 152d71ae5a4SJacob Faibussowitsch case 2: 153d71ae5a4SJacob Faibussowitsch inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_inplace; 154d71ae5a4SJacob Faibussowitsch break; 155d71ae5a4SJacob Faibussowitsch case 3: 156d71ae5a4SJacob Faibussowitsch inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_inplace; 157d71ae5a4SJacob Faibussowitsch break; 158d71ae5a4SJacob Faibussowitsch case 4: 159d71ae5a4SJacob Faibussowitsch inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_inplace; 160d71ae5a4SJacob Faibussowitsch break; 161d71ae5a4SJacob Faibussowitsch case 5: 162d71ae5a4SJacob Faibussowitsch inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_inplace; 163d71ae5a4SJacob Faibussowitsch break; 164d71ae5a4SJacob Faibussowitsch case 6: 165d71ae5a4SJacob Faibussowitsch inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_inplace; 166d71ae5a4SJacob Faibussowitsch break; 167d71ae5a4SJacob Faibussowitsch case 7: 168d71ae5a4SJacob Faibussowitsch inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_inplace; 169d71ae5a4SJacob Faibussowitsch break; 170d71ae5a4SJacob Faibussowitsch default: 171d71ae5a4SJacob Faibussowitsch inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace; 172d71ae5a4SJacob Faibussowitsch break; 173db4efbfdSBarry Smith } 174db4efbfdSBarry Smith } 1753ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 176db4efbfdSBarry Smith } 177db4efbfdSBarry Smith 17883287d42SBarry Smith /* 17983287d42SBarry Smith The symbolic factorization code is identical to that for AIJ format, 18083287d42SBarry Smith except for very small changes since this is now a SeqBAIJ datastructure. 18183287d42SBarry Smith NOT good code reuse. 18283287d42SBarry Smith */ 183c6db04a5SJed Brown #include <petscbt.h> 184c6db04a5SJed Brown #include <../src/mat/utils/freespace.h> 185c7272abeSHong Zhang 186d71ae5a4SJacob Faibussowitsch PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 187d71ae5a4SJacob Faibussowitsch { 18883287d42SBarry Smith Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b; 189c7272abeSHong Zhang PetscInt n = a->mbs, bs = A->rmap->bs, bs2 = a->bs2; 190ace3abfcSBarry Smith PetscBool row_identity, col_identity, both_identity; 19183287d42SBarry Smith IS isicol; 192c7272abeSHong Zhang const PetscInt *r, *ic; 193c7272abeSHong Zhang PetscInt i, *ai = a->i, *aj = a->j; 194c7272abeSHong Zhang PetscInt *bi, *bj, *ajtmp; 195c7272abeSHong Zhang PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im; 196c7272abeSHong Zhang PetscReal f; 197c7272abeSHong Zhang PetscInt nlnk, *lnk, k, **bi_ptr; 1980298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 199c7272abeSHong Zhang PetscBT lnkbt; 200ece542f9SHong Zhang PetscBool missing; 20183287d42SBarry Smith 20283287d42SBarry Smith PetscFunctionBegin; 20394bad497SJacob Faibussowitsch PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square"); 2049566063dSJacob Faibussowitsch PetscCall(MatMissingDiagonal(A, &missing, &i)); 20594bad497SJacob Faibussowitsch PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 206ece542f9SHong Zhang 2076ba06ab7SHong Zhang if (bs > 1) { /* check shifttype */ 208aed4548fSBarry Smith PetscCheck(info->shifttype != MAT_SHIFT_NONZERO && info->shifttype != MAT_SHIFT_POSITIVE_DEFINITE, PETSC_COMM_SELF, PETSC_ERR_SUP, "Only MAT_SHIFT_NONE and MAT_SHIFT_INBLOCKS are supported for BAIJ matrix"); 2096ba06ab7SHong Zhang } 2106ba06ab7SHong Zhang 2119566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 2129566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r)); 2139566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic)); 21483287d42SBarry Smith 215bc74c81fSJed Brown /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 2169566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &bi)); 2179566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &bdiag)); 218a6df321fSShri Abhyankar bi[0] = bdiag[0] = 0; 219b2b2dd24SShri Abhyankar 220b2b2dd24SShri Abhyankar /* linked list for storing column indices of the active row */ 221b2b2dd24SShri Abhyankar nlnk = n + 1; 2229566063dSJacob Faibussowitsch PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt)); 223b2b2dd24SShri Abhyankar 2249566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im)); 225b2b2dd24SShri Abhyankar 226b2b2dd24SShri Abhyankar /* initial FreeSpace size is f*(ai[n]+1) */ 227b2b2dd24SShri Abhyankar f = info->fill; 2289566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 22926fbe8dcSKarl Rupp 230b2b2dd24SShri Abhyankar current_space = free_space; 231b2b2dd24SShri Abhyankar 232b2b2dd24SShri Abhyankar for (i = 0; i < n; i++) { 233b2b2dd24SShri Abhyankar /* copy previous fill into linked list */ 234b2b2dd24SShri Abhyankar nzi = 0; 235b2b2dd24SShri Abhyankar nnz = ai[r[i] + 1] - ai[r[i]]; 236b2b2dd24SShri Abhyankar ajtmp = aj + ai[r[i]]; 2379566063dSJacob Faibussowitsch PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt)); 238b2b2dd24SShri Abhyankar nzi += nlnk; 239b2b2dd24SShri Abhyankar 240b2b2dd24SShri Abhyankar /* add pivot rows into linked list */ 241b2b2dd24SShri Abhyankar row = lnk[n]; 242b2b2dd24SShri Abhyankar while (row < i) { 243b2b2dd24SShri Abhyankar nzbd = bdiag[row] + 1; /* num of entries in the row with column index <= row */ 244b2b2dd24SShri Abhyankar ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 2459566063dSJacob Faibussowitsch PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im)); 246b2b2dd24SShri Abhyankar nzi += nlnk; 247b2b2dd24SShri Abhyankar row = lnk[row]; 248b2b2dd24SShri Abhyankar } 249b2b2dd24SShri Abhyankar bi[i + 1] = bi[i] + nzi; 250b2b2dd24SShri Abhyankar im[i] = nzi; 251b2b2dd24SShri Abhyankar 252b2b2dd24SShri Abhyankar /* mark bdiag */ 253b2b2dd24SShri Abhyankar nzbd = 0; 254b2b2dd24SShri Abhyankar nnz = nzi; 255b2b2dd24SShri Abhyankar k = lnk[n]; 256b2b2dd24SShri Abhyankar while (nnz-- && k < i) { 257b2b2dd24SShri Abhyankar nzbd++; 258b2b2dd24SShri Abhyankar k = lnk[k]; 259b2b2dd24SShri Abhyankar } 2602ce24eb6SHong Zhang bdiag[i] = nzbd; /* note : bdaig[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */ 261b2b2dd24SShri Abhyankar 262b2b2dd24SShri Abhyankar /* if free space is not available, make more free space */ 263b2b2dd24SShri Abhyankar if (current_space->local_remaining < nzi) { 264f91af8c7SBarry Smith nnz = PetscIntMultTruncate(2, PetscIntMultTruncate(n - i, nzi)); /* estimated and max additional space needed */ 2659566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 266b2b2dd24SShri Abhyankar reallocs++; 267b2b2dd24SShri Abhyankar } 268b2b2dd24SShri Abhyankar 269b2b2dd24SShri Abhyankar /* copy data into free space, then initialize lnk */ 2709566063dSJacob Faibussowitsch PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt)); 27126fbe8dcSKarl Rupp 272b2b2dd24SShri Abhyankar bi_ptr[i] = current_space->array; 273b2b2dd24SShri Abhyankar current_space->array += nzi; 274b2b2dd24SShri Abhyankar current_space->local_used += nzi; 275b2b2dd24SShri Abhyankar current_space->local_remaining -= nzi; 276b2b2dd24SShri Abhyankar } 277b2b2dd24SShri Abhyankar 2789566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r)); 2799566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic)); 280b2b2dd24SShri Abhyankar 2819263d837SHong Zhang /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */ 2829566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(bi[n] + 1, &bj)); 2839566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous_LU(&free_space, bj, n, bi, bdiag)); 2849566063dSJacob Faibussowitsch PetscCall(PetscLLDestroy(lnk, lnkbt)); 2859566063dSJacob Faibussowitsch PetscCall(PetscFree2(bi_ptr, im)); 286b2b2dd24SShri Abhyankar 287b2b2dd24SShri Abhyankar /* put together the new matrix */ 2889566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(B, bs, MAT_SKIP_ALLOCATION, NULL)); 289b2b2dd24SShri Abhyankar b = (Mat_SeqBAIJ *)(B)->data; 29026fbe8dcSKarl Rupp 291b2b2dd24SShri Abhyankar b->free_a = PETSC_TRUE; 292b2b2dd24SShri Abhyankar b->free_ij = PETSC_TRUE; 293b2b2dd24SShri Abhyankar b->singlemalloc = PETSC_FALSE; 29426fbe8dcSKarl Rupp 2959566063dSJacob Faibussowitsch PetscCall(PetscMalloc1((bdiag[0] + 1) * bs2, &b->a)); 296b2b2dd24SShri Abhyankar b->j = bj; 297b2b2dd24SShri Abhyankar b->i = bi; 298b2b2dd24SShri Abhyankar b->diag = bdiag; 299b2b2dd24SShri Abhyankar b->free_diag = PETSC_TRUE; 300f4259b30SLisandro Dalcin b->ilen = NULL; 301f4259b30SLisandro Dalcin b->imax = NULL; 302b2b2dd24SShri Abhyankar b->row = isrow; 303b2b2dd24SShri Abhyankar b->col = iscol; 304b2b2dd24SShri Abhyankar b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE; 30526fbe8dcSKarl Rupp 3069566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)isrow)); 3079566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)iscol)); 308b2b2dd24SShri Abhyankar b->icol = isicol; 3099566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(bs * n + bs, &b->solve_work)); 310b2b2dd24SShri Abhyankar 311b2b2dd24SShri Abhyankar b->maxnz = b->nz = bdiag[0] + 1; 31226fbe8dcSKarl Rupp 313d5f3da31SBarry Smith B->factortype = MAT_FACTOR_LU; 314b2b2dd24SShri Abhyankar B->info.factor_mallocs = reallocs; 315b2b2dd24SShri Abhyankar B->info.fill_ratio_given = f; 316b2b2dd24SShri Abhyankar 317b2b2dd24SShri Abhyankar if (ai[n] != 0) { 318b2b2dd24SShri Abhyankar B->info.fill_ratio_needed = ((PetscReal)(bdiag[0] + 1)) / ((PetscReal)ai[n]); 319b2b2dd24SShri Abhyankar } else { 320b2b2dd24SShri Abhyankar B->info.fill_ratio_needed = 0.0; 321b2b2dd24SShri Abhyankar } 3229263d837SHong Zhang #if defined(PETSC_USE_INFO) 3239263d837SHong Zhang if (ai[n] != 0) { 3249263d837SHong Zhang PetscReal af = B->info.fill_ratio_needed; 3259566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 3269566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 3279566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af)); 3289566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "for best performance.\n")); 3299263d837SHong Zhang } else { 3309566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Empty matrix\n")); 3319263d837SHong Zhang } 3329263d837SHong Zhang #endif 333b2b2dd24SShri Abhyankar 3349566063dSJacob Faibussowitsch PetscCall(ISIdentity(isrow, &row_identity)); 3359566063dSJacob Faibussowitsch PetscCall(ISIdentity(iscol, &col_identity)); 33626fbe8dcSKarl Rupp 337ace3abfcSBarry Smith both_identity = (PetscBool)(row_identity && col_identity); 33826fbe8dcSKarl Rupp 3399566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetNumericFactorization(B, both_identity)); 3403ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 341b2b2dd24SShri Abhyankar } 342b2b2dd24SShri Abhyankar 343*ff6a9541SJacob Faibussowitsch #if 0 344*ff6a9541SJacob Faibussowitsch // unused 345*ff6a9541SJacob Faibussowitsch static PetscErrorCode MatLUFactorSymbolic_SeqBAIJ_inplace(Mat B, Mat A, IS isrow, IS iscol, const MatFactorInfo *info) 346d71ae5a4SJacob Faibussowitsch { 347faca2338SShri Abhyankar Mat_SeqBAIJ *a = (Mat_SeqBAIJ *)A->data, *b; 348faca2338SShri Abhyankar PetscInt n = a->mbs, bs = A->rmap->bs, bs2 = a->bs2; 349ace3abfcSBarry Smith PetscBool row_identity, col_identity, both_identity; 350faca2338SShri Abhyankar IS isicol; 351faca2338SShri Abhyankar const PetscInt *r, *ic; 352faca2338SShri Abhyankar PetscInt i, *ai = a->i, *aj = a->j; 353faca2338SShri Abhyankar PetscInt *bi, *bj, *ajtmp; 354faca2338SShri Abhyankar PetscInt *bdiag, row, nnz, nzi, reallocs = 0, nzbd, *im; 355faca2338SShri Abhyankar PetscReal f; 356faca2338SShri Abhyankar PetscInt nlnk, *lnk, k, **bi_ptr; 3570298fd71SBarry Smith PetscFreeSpaceList free_space = NULL, current_space = NULL; 358faca2338SShri Abhyankar PetscBT lnkbt; 359ece542f9SHong Zhang PetscBool missing; 360faca2338SShri Abhyankar 361faca2338SShri Abhyankar PetscFunctionBegin; 36294bad497SJacob Faibussowitsch PetscCheck(A->rmap->N == A->cmap->N, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONG, "matrix must be square"); 3639566063dSJacob Faibussowitsch PetscCall(MatMissingDiagonal(A, &missing, &i)); 36494bad497SJacob Faibussowitsch PetscCheck(!missing, PETSC_COMM_SELF, PETSC_ERR_ARG_WRONGSTATE, "Matrix is missing diagonal entry %" PetscInt_FMT, i); 365ece542f9SHong Zhang 3669566063dSJacob Faibussowitsch PetscCall(ISInvertPermutation(iscol, PETSC_DECIDE, &isicol)); 3679566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isrow, &r)); 3689566063dSJacob Faibussowitsch PetscCall(ISGetIndices(isicol, &ic)); 369faca2338SShri Abhyankar 370bc74c81fSJed Brown /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */ 3719566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &bi)); 3729566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n + 1, &bdiag)); 373bc74c81fSJed Brown 374a6df321fSShri Abhyankar bi[0] = bdiag[0] = 0; 375c7272abeSHong Zhang 376c7272abeSHong Zhang /* linked list for storing column indices of the active row */ 377c7272abeSHong Zhang nlnk = n + 1; 3789566063dSJacob Faibussowitsch PetscCall(PetscLLCreate(n, n, nlnk, lnk, lnkbt)); 379c7272abeSHong Zhang 3809566063dSJacob Faibussowitsch PetscCall(PetscMalloc2(n + 1, &bi_ptr, n + 1, &im)); 381c7272abeSHong Zhang 382c7272abeSHong Zhang /* initial FreeSpace size is f*(ai[n]+1) */ 383c7272abeSHong Zhang f = info->fill; 3849566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(PetscRealIntMultTruncate(f, ai[n] + 1), &free_space)); 385c7272abeSHong Zhang current_space = free_space; 38683287d42SBarry Smith 38783287d42SBarry Smith for (i = 0; i < n; i++) { 388c7272abeSHong Zhang /* copy previous fill into linked list */ 38983287d42SBarry Smith nzi = 0; 390c7272abeSHong Zhang nnz = ai[r[i] + 1] - ai[r[i]]; 391c7272abeSHong Zhang ajtmp = aj + ai[r[i]]; 3929566063dSJacob Faibussowitsch PetscCall(PetscLLAddPerm(nnz, ajtmp, ic, n, &nlnk, lnk, lnkbt)); 393c7272abeSHong Zhang nzi += nlnk; 394c7272abeSHong Zhang 395c7272abeSHong Zhang /* add pivot rows into linked list */ 396c7272abeSHong Zhang row = lnk[n]; 397c7272abeSHong Zhang while (row < i) { 398c7272abeSHong Zhang nzbd = bdiag[row] - bi[row] + 1; /* num of entries in the row with column index <= row */ 399c7272abeSHong Zhang ajtmp = bi_ptr[row] + nzbd; /* points to the entry next to the diagonal */ 4009566063dSJacob Faibussowitsch PetscCall(PetscLLAddSortedLU(ajtmp, row, &nlnk, lnk, lnkbt, i, nzbd, im)); 401c7272abeSHong Zhang nzi += nlnk; 402c7272abeSHong Zhang row = lnk[row]; 40383287d42SBarry Smith } 404c7272abeSHong Zhang bi[i + 1] = bi[i] + nzi; 405c7272abeSHong Zhang im[i] = nzi; 406c7272abeSHong Zhang 407c7272abeSHong Zhang /* mark bdiag */ 408c7272abeSHong Zhang nzbd = 0; 409c7272abeSHong Zhang nnz = nzi; 410c7272abeSHong Zhang k = lnk[n]; 411c7272abeSHong Zhang while (nnz-- && k < i) { 412c7272abeSHong Zhang nzbd++; 413c7272abeSHong Zhang k = lnk[k]; 414c7272abeSHong Zhang } 415c7272abeSHong Zhang bdiag[i] = bi[i] + nzbd; 416c7272abeSHong Zhang 417c7272abeSHong Zhang /* if free space is not available, make more free space */ 418c7272abeSHong Zhang if (current_space->local_remaining < nzi) { 419f91af8c7SBarry Smith nnz = PetscIntMultTruncate(n - i, nzi); /* estimated and max additional space needed */ 4209566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceGet(nnz, ¤t_space)); 421c7272abeSHong Zhang reallocs++; 42283287d42SBarry Smith } 42383287d42SBarry Smith 424c7272abeSHong Zhang /* copy data into free space, then initialize lnk */ 4259566063dSJacob Faibussowitsch PetscCall(PetscLLClean(n, n, nzi, lnk, current_space->array, lnkbt)); 42626fbe8dcSKarl Rupp 427c7272abeSHong Zhang bi_ptr[i] = current_space->array; 428c7272abeSHong Zhang current_space->array += nzi; 429c7272abeSHong Zhang current_space->local_used += nzi; 430c7272abeSHong Zhang current_space->local_remaining -= nzi; 431c7272abeSHong Zhang } 4326cf91177SBarry Smith #if defined(PETSC_USE_INFO) 43383287d42SBarry Smith if (ai[n] != 0) { 434c7272abeSHong Zhang PetscReal af = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 4359566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n", reallocs, (double)f, (double)af)); 4369566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Run with -pc_factor_fill %g or use \n", (double)af)); 4379566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "PCFactorSetFill(pc,%g);\n", (double)af)); 4389566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "for best performance.\n")); 43983287d42SBarry Smith } else { 4409566063dSJacob Faibussowitsch PetscCall(PetscInfo(A, "Empty matrix\n")); 44183287d42SBarry Smith } 44263ba0a88SBarry Smith #endif 44383287d42SBarry Smith 4449566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isrow, &r)); 4459566063dSJacob Faibussowitsch PetscCall(ISRestoreIndices(isicol, &ic)); 44683287d42SBarry Smith 447c7272abeSHong Zhang /* destroy list of free space and other temporary array(s) */ 4489566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(bi[n] + 1, &bj)); 4499566063dSJacob Faibussowitsch PetscCall(PetscFreeSpaceContiguous(&free_space, bj)); 4509566063dSJacob Faibussowitsch PetscCall(PetscLLDestroy(lnk, lnkbt)); 4519566063dSJacob Faibussowitsch PetscCall(PetscFree2(bi_ptr, im)); 45283287d42SBarry Smith 45383287d42SBarry Smith /* put together the new matrix */ 4549566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetPreallocation(B, bs, MAT_SKIP_ALLOCATION, NULL)); 455719d5645SBarry Smith b = (Mat_SeqBAIJ *)(B)->data; 45626fbe8dcSKarl Rupp 457e6b907acSBarry Smith b->free_a = PETSC_TRUE; 458e6b907acSBarry Smith b->free_ij = PETSC_TRUE; 459c7272abeSHong Zhang b->singlemalloc = PETSC_FALSE; 46026fbe8dcSKarl Rupp 4619566063dSJacob Faibussowitsch PetscCall(PetscMalloc1((bi[n] + 1) * bs2, &b->a)); 462c7272abeSHong Zhang b->j = bj; 463c7272abeSHong Zhang b->i = bi; 464c7272abeSHong Zhang b->diag = bdiag; 4657f53bb6cSHong Zhang b->free_diag = PETSC_TRUE; 466f4259b30SLisandro Dalcin b->ilen = NULL; 467f4259b30SLisandro Dalcin b->imax = NULL; 46883287d42SBarry Smith b->row = isrow; 46983287d42SBarry Smith b->col = iscol; 470d3d32019SBarry Smith b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE; 47126fbe8dcSKarl Rupp 4729566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)isrow)); 4739566063dSJacob Faibussowitsch PetscCall(PetscObjectReference((PetscObject)iscol)); 47483287d42SBarry Smith b->icol = isicol; 47526fbe8dcSKarl Rupp 4769566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(bs * n + bs, &b->solve_work)); 47783287d42SBarry Smith 478c7272abeSHong Zhang b->maxnz = b->nz = bi[n]; 47926fbe8dcSKarl Rupp 480d5f3da31SBarry Smith (B)->factortype = MAT_FACTOR_LU; 481719d5645SBarry Smith (B)->info.factor_mallocs = reallocs; 482719d5645SBarry Smith (B)->info.fill_ratio_given = f; 483c7272abeSHong Zhang 48483287d42SBarry Smith if (ai[n] != 0) { 485c7272abeSHong Zhang (B)->info.fill_ratio_needed = ((PetscReal)bi[n]) / ((PetscReal)ai[n]); 48683287d42SBarry Smith } else { 487719d5645SBarry Smith (B)->info.fill_ratio_needed = 0.0; 48883287d42SBarry Smith } 489c7272abeSHong Zhang 4909566063dSJacob Faibussowitsch PetscCall(ISIdentity(isrow, &row_identity)); 4919566063dSJacob Faibussowitsch PetscCall(ISIdentity(iscol, &col_identity)); 49226fbe8dcSKarl Rupp 493ace3abfcSBarry Smith both_identity = (PetscBool)(row_identity && col_identity); 49426fbe8dcSKarl Rupp 4959566063dSJacob Faibussowitsch PetscCall(MatSeqBAIJSetNumericFactorization_inplace(B, both_identity)); 4963ba16761SJacob Faibussowitsch PetscFunctionReturn(PETSC_SUCCESS); 49783287d42SBarry Smith } 498*ff6a9541SJacob Faibussowitsch #endif 499