1b2f1ab58SBarry Smith 2b2f1ab58SBarry Smith /* 3b2f1ab58SBarry Smith spbas_cholesky_row_alloc: 4b2f1ab58SBarry Smith in the data arrays, find a place where another row may be stored. 5b2f1ab58SBarry Smith Return PETSC_ERR_MEM if there is insufficient space to store the 6b2f1ab58SBarry Smith row, so garbage collection and/or re-allocation may be done. 7b2f1ab58SBarry Smith */ 8*9371c9d4SSatish Balay PetscBool spbas_cholesky_row_alloc(spbas_matrix retval, PetscInt k, PetscInt r_nnz, PetscInt *n_alloc_used) { 9b2f1ab58SBarry Smith PetscFunctionBegin; 10b2f1ab58SBarry Smith retval.icols[k] = &retval.alloc_icol[*n_alloc_used]; 11b2f1ab58SBarry Smith retval.values[k] = &retval.alloc_val[*n_alloc_used]; 12b2f1ab58SBarry Smith *n_alloc_used += r_nnz; 13cc442ecdSBarry Smith if (*n_alloc_used > retval.n_alloc_icol) PetscFunctionReturn(PETSC_FALSE); 14cc442ecdSBarry Smith PetscFunctionReturn(PETSC_TRUE); 15b2f1ab58SBarry Smith } 16b2f1ab58SBarry Smith 17b2f1ab58SBarry Smith /* 18b2f1ab58SBarry Smith spbas_cholesky_garbage_collect: 19b2f1ab58SBarry Smith move the rows which have been calculated so far, as well as 20b2f1ab58SBarry Smith those currently under construction, to the front of the 21b2f1ab58SBarry Smith array, while putting them in the proper order. 22b2f1ab58SBarry Smith When it seems necessary, enlarge the current arrays. 23b2f1ab58SBarry Smith 24b2f1ab58SBarry Smith NB: re-allocation is being simulated using 25b2f1ab58SBarry Smith PetscMalloc, memcpy, PetscFree, because 26b2f1ab58SBarry Smith PetscRealloc does not seem to exist. 27b2f1ab58SBarry Smith 28b2f1ab58SBarry Smith */ 29be332245SKarl Rupp PetscErrorCode spbas_cholesky_garbage_collect(spbas_matrix *result, /* I/O: the Cholesky factor matrix being constructed. 302205254eSKarl Rupp Only the storage, not the contents of this matrix is changed in this function */ 31be332245SKarl Rupp PetscInt i_row, /* I : Number of rows for which the final contents are known */ 32c328eeadSBarry Smith PetscInt *n_row_alloc_ok, /* I/O: Number of rows which are already in their final 33c328eeadSBarry Smith places in the arrays: they need not be moved any more */ 34c328eeadSBarry Smith PetscInt *n_alloc_used, /* I/O: */ 35be332245SKarl Rupp PetscInt *max_row_nnz) /* I : Over-estimate of the number of nonzeros needed to store each row */ 36b2f1ab58SBarry Smith { 37c328eeadSBarry Smith /* PSEUDO-CODE: 38c328eeadSBarry Smith 1. Choose the appropriate size for the arrays 39c328eeadSBarry Smith 2. Rescue the arrays which would be lost during garbage collection 40c328eeadSBarry Smith 3. Reallocate and correct administration 41c328eeadSBarry Smith 4. Move all arrays so that they are in proper order */ 42b2f1ab58SBarry Smith 43b2f1ab58SBarry Smith PetscInt i, j; 44b2f1ab58SBarry Smith PetscInt nrows = result->nrows; 45b2f1ab58SBarry Smith PetscInt n_alloc_ok = 0; 46b2f1ab58SBarry Smith PetscInt n_alloc_ok_max = 0; 47b2f1ab58SBarry Smith PetscInt need_already = 0; 48b2f1ab58SBarry Smith PetscInt max_need_extra = 0; 49b2f1ab58SBarry Smith PetscInt n_alloc_max, n_alloc_est, n_alloc; 50b2f1ab58SBarry Smith PetscInt n_alloc_now = result->n_alloc_icol; 51b2f1ab58SBarry Smith PetscInt *alloc_icol_old = result->alloc_icol; 52b2f1ab58SBarry Smith PetscScalar *alloc_val_old = result->alloc_val; 53b2f1ab58SBarry Smith PetscInt *icol_rescue; 54b2f1ab58SBarry Smith PetscScalar *val_rescue; 55b2f1ab58SBarry Smith PetscInt n_rescue; 56b2f1ab58SBarry Smith PetscInt i_here, i_last, n_copy; 57d36aa34eSBarry Smith const PetscReal xtra_perc = 20; 58b2f1ab58SBarry Smith 59b2f1ab58SBarry Smith PetscFunctionBegin; 60c328eeadSBarry Smith /********************************************************* 61c328eeadSBarry Smith 1. Choose appropriate array size 62c328eeadSBarry Smith Count number of rows and memory usage which is already final */ 639ccc4a9bSBarry Smith for (i = 0; i < i_row; i++) { 64b2f1ab58SBarry Smith n_alloc_ok += result->row_nnz[i]; 65b2f1ab58SBarry Smith n_alloc_ok_max += max_row_nnz[i]; 66b2f1ab58SBarry Smith } 67b2f1ab58SBarry Smith 68c328eeadSBarry Smith /* Count currently needed memory usage and future memory requirements 69c328eeadSBarry Smith (max, predicted)*/ 709ccc4a9bSBarry Smith for (i = i_row; i < nrows; i++) { 719ccc4a9bSBarry Smith if (!result->row_nnz[i]) { 72b2f1ab58SBarry Smith max_need_extra += max_row_nnz[i]; 739ccc4a9bSBarry Smith } else { 74b2f1ab58SBarry Smith need_already += max_row_nnz[i]; 75b2f1ab58SBarry Smith } 76b2f1ab58SBarry Smith } 77b2f1ab58SBarry Smith 78c328eeadSBarry Smith /* Make maximal and realistic memory requirement estimates */ 79b2f1ab58SBarry Smith n_alloc_max = n_alloc_ok + need_already + max_need_extra; 809ccc4a9bSBarry Smith n_alloc_est = n_alloc_ok + need_already + (int)(((PetscReal)max_need_extra) * ((PetscReal)n_alloc_ok) / ((PetscReal)n_alloc_ok_max)); 81b2f1ab58SBarry Smith 82c328eeadSBarry Smith /* Choose array sizes */ 832205254eSKarl Rupp if (n_alloc_max == n_alloc_est) n_alloc = n_alloc_max; 842205254eSKarl Rupp else if (n_alloc_now >= n_alloc_est) n_alloc = n_alloc_now; 852205254eSKarl Rupp else if (n_alloc_max < n_alloc_est * (1 + xtra_perc / 100.0)) n_alloc = n_alloc_max; 862205254eSKarl Rupp else n_alloc = (int)(n_alloc_est * (1 + xtra_perc / 100.0)); 87b2f1ab58SBarry Smith 88c328eeadSBarry Smith /* If new estimate is less than what we already have, 89c328eeadSBarry Smith don't reallocate, just garbage-collect */ 90*9371c9d4SSatish Balay if (n_alloc_max != n_alloc_est && n_alloc < result->n_alloc_icol) { n_alloc = result->n_alloc_icol; } 91b2f1ab58SBarry Smith 92c328eeadSBarry Smith /* Motivate dimension choice */ 939566063dSJacob Faibussowitsch PetscCall(PetscInfo(NULL, " Allocating %" PetscInt_FMT " nonzeros: ", n_alloc)); 942205254eSKarl Rupp if (n_alloc_max == n_alloc_est) { 959566063dSJacob Faibussowitsch PetscCall(PetscInfo(NULL, "this is the correct size\n")); 962205254eSKarl Rupp } else if (n_alloc_now >= n_alloc_est) { 979566063dSJacob Faibussowitsch PetscCall(PetscInfo(NULL, "the current size, which seems enough\n")); 982205254eSKarl Rupp } else if (n_alloc_max < n_alloc_est * (1 + xtra_perc / 100.0)) { 999566063dSJacob Faibussowitsch PetscCall(PetscInfo(NULL, "the maximum estimate\n")); 1002205254eSKarl Rupp } else { 1019566063dSJacob Faibussowitsch PetscCall(PetscInfo(NULL, "%6.2f %% more than the estimate\n", (double)xtra_perc)); 1022205254eSKarl Rupp } 103b2f1ab58SBarry Smith 104c328eeadSBarry Smith /********************************************************** 105c328eeadSBarry Smith 2. Rescue arrays which would be lost 106c328eeadSBarry Smith Count how many rows and nonzeros will have to be rescued 107c328eeadSBarry Smith when moving all arrays in place */ 108c599c493SJunchao Zhang n_rescue = 0; 1092205254eSKarl Rupp if (*n_row_alloc_ok == 0) *n_alloc_used = 0; 1109ccc4a9bSBarry Smith else { 111b2f1ab58SBarry Smith i = *n_row_alloc_ok - 1; 1122205254eSKarl Rupp 1139ccc4a9bSBarry Smith *n_alloc_used = (result->icols[i] - result->alloc_icol) + result->row_nnz[i]; 114b2f1ab58SBarry Smith } 115b2f1ab58SBarry Smith 1169ccc4a9bSBarry Smith for (i = *n_row_alloc_ok; i < nrows; i++) { 117b2f1ab58SBarry Smith i_here = result->icols[i] - result->alloc_icol; 118b2f1ab58SBarry Smith i_last = i_here + result->row_nnz[i]; 1199ccc4a9bSBarry Smith if (result->row_nnz[i] > 0) { 120*9371c9d4SSatish Balay if (*n_alloc_used > i_here || i_last > n_alloc) { n_rescue += result->row_nnz[i]; } 121b2f1ab58SBarry Smith 1222205254eSKarl Rupp if (i < i_row) *n_alloc_used += result->row_nnz[i]; 1232205254eSKarl Rupp else *n_alloc_used += max_row_nnz[i]; 124b2f1ab58SBarry Smith } 125b2f1ab58SBarry Smith } 126b2f1ab58SBarry Smith 127c328eeadSBarry Smith /* Allocate rescue arrays */ 1289566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n_rescue, &icol_rescue)); 1299566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n_rescue, &val_rescue)); 130b2f1ab58SBarry Smith 131c328eeadSBarry Smith /* Rescue the arrays which need rescuing */ 132c599c493SJunchao Zhang n_rescue = 0; 1332205254eSKarl Rupp if (*n_row_alloc_ok == 0) *n_alloc_used = 0; 1349ccc4a9bSBarry Smith else { 135b2f1ab58SBarry Smith i = *n_row_alloc_ok - 1; 1369ccc4a9bSBarry Smith *n_alloc_used = (result->icols[i] - result->alloc_icol) + result->row_nnz[i]; 137b2f1ab58SBarry Smith } 138b2f1ab58SBarry Smith 1399ccc4a9bSBarry Smith for (i = *n_row_alloc_ok; i < nrows; i++) { 140b2f1ab58SBarry Smith i_here = result->icols[i] - result->alloc_icol; 141b2f1ab58SBarry Smith i_last = i_here + result->row_nnz[i]; 1429ccc4a9bSBarry Smith if (result->row_nnz[i] > 0) { 1434e1ff37aSBarry Smith if (*n_alloc_used > i_here || i_last > n_alloc) { 1449566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(&icol_rescue[n_rescue], result->icols[i], result->row_nnz[i])); 1459566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(&val_rescue[n_rescue], result->values[i], result->row_nnz[i])); 146b2f1ab58SBarry Smith n_rescue += result->row_nnz[i]; 147b2f1ab58SBarry Smith } 148b2f1ab58SBarry Smith 1492205254eSKarl Rupp if (i < i_row) *n_alloc_used += result->row_nnz[i]; 1502205254eSKarl Rupp else *n_alloc_used += max_row_nnz[i]; 151b2f1ab58SBarry Smith } 152b2f1ab58SBarry Smith } 153b2f1ab58SBarry Smith 154c328eeadSBarry Smith /********************************************************** 155c328eeadSBarry Smith 3. Reallocate and correct administration */ 156b2f1ab58SBarry Smith 1579ccc4a9bSBarry Smith if (n_alloc != result->n_alloc_icol) { 158cc6fc633SBarry Smith n_copy = PetscMin(n_alloc, result->n_alloc_icol); 159b2f1ab58SBarry Smith 160c328eeadSBarry Smith /* PETSC knows no REALLOC, so we'll REALLOC ourselves. 161b2f1ab58SBarry Smith 162c328eeadSBarry Smith Allocate new icol-data, copy old contents */ 1639566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n_alloc, &result->alloc_icol)); 1649566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(result->alloc_icol, alloc_icol_old, n_copy)); 165b2f1ab58SBarry Smith 166c328eeadSBarry Smith /* Update administration, Reset pointers to new arrays */ 167b2f1ab58SBarry Smith result->n_alloc_icol = n_alloc; 1689ccc4a9bSBarry Smith for (i = 0; i < nrows; i++) { 1699ccc4a9bSBarry Smith result->icols[i] = result->alloc_icol + (result->icols[i] - alloc_icol_old); 1709ccc4a9bSBarry Smith result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol); 171b2f1ab58SBarry Smith } 172b2f1ab58SBarry Smith 173c328eeadSBarry Smith /* Delete old array */ 1749566063dSJacob Faibussowitsch PetscCall(PetscFree(alloc_icol_old)); 175b2f1ab58SBarry Smith 176c328eeadSBarry Smith /* Allocate new value-data, copy old contents */ 1779566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(n_alloc, &result->alloc_val)); 1789566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(result->alloc_val, alloc_val_old, n_copy)); 179b2f1ab58SBarry Smith 180c328eeadSBarry Smith /* Update administration, Reset pointers to new arrays */ 181b2f1ab58SBarry Smith result->n_alloc_val = n_alloc; 182*9371c9d4SSatish Balay for (i = 0; i < nrows; i++) { result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol); } 183b2f1ab58SBarry Smith 184c328eeadSBarry Smith /* Delete old array */ 1859566063dSJacob Faibussowitsch PetscCall(PetscFree(alloc_val_old)); 186b2f1ab58SBarry Smith } 187b2f1ab58SBarry Smith 188c328eeadSBarry Smith /********************************************************* 189c328eeadSBarry Smith 4. Copy all the arrays to their proper places */ 190c599c493SJunchao Zhang n_rescue = 0; 1912205254eSKarl Rupp if (*n_row_alloc_ok == 0) *n_alloc_used = 0; 1929ccc4a9bSBarry Smith else { 193b2f1ab58SBarry Smith i = *n_row_alloc_ok - 1; 1942205254eSKarl Rupp 1959ccc4a9bSBarry Smith *n_alloc_used = (result->icols[i] - result->alloc_icol) + result->row_nnz[i]; 196b2f1ab58SBarry Smith } 197b2f1ab58SBarry Smith 1989ccc4a9bSBarry Smith for (i = *n_row_alloc_ok; i < nrows; i++) { 199b2f1ab58SBarry Smith i_here = result->icols[i] - result->alloc_icol; 200b2f1ab58SBarry Smith i_last = i_here + result->row_nnz[i]; 201b2f1ab58SBarry Smith 202b2f1ab58SBarry Smith result->icols[i] = result->alloc_icol + *n_alloc_used; 203b2f1ab58SBarry Smith result->values[i] = result->alloc_val + *n_alloc_used; 204b2f1ab58SBarry Smith 2059ccc4a9bSBarry Smith if (result->row_nnz[i] > 0) { 2069ccc4a9bSBarry Smith if (*n_alloc_used > i_here || i_last > n_alloc) { 2079566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(result->icols[i], &icol_rescue[n_rescue], result->row_nnz[i])); 2089566063dSJacob Faibussowitsch PetscCall(PetscArraycpy(result->values[i], &val_rescue[n_rescue], result->row_nnz[i])); 2092205254eSKarl Rupp 210b2f1ab58SBarry Smith n_rescue += result->row_nnz[i]; 2119ccc4a9bSBarry Smith } else { 2129ccc4a9bSBarry Smith for (j = 0; j < result->row_nnz[i]; j++) { 213b2f1ab58SBarry Smith result->icols[i][j] = result->alloc_icol[i_here + j]; 214b2f1ab58SBarry Smith result->values[i][j] = result->alloc_val[i_here + j]; 215b2f1ab58SBarry Smith } 216b2f1ab58SBarry Smith } 2172205254eSKarl Rupp if (i < i_row) *n_alloc_used += result->row_nnz[i]; 2182205254eSKarl Rupp else *n_alloc_used += max_row_nnz[i]; 219b2f1ab58SBarry Smith } 220b2f1ab58SBarry Smith } 221b2f1ab58SBarry Smith 222c328eeadSBarry Smith /* Delete the rescue arrays */ 2239566063dSJacob Faibussowitsch PetscCall(PetscFree(icol_rescue)); 2249566063dSJacob Faibussowitsch PetscCall(PetscFree(val_rescue)); 2252205254eSKarl Rupp 226b2f1ab58SBarry Smith *n_row_alloc_ok = i_row; 227b2f1ab58SBarry Smith PetscFunctionReturn(0); 228b2f1ab58SBarry Smith } 229b2f1ab58SBarry Smith 230b2f1ab58SBarry Smith /* 231b2f1ab58SBarry Smith spbas_incomplete_cholesky: 232b2f1ab58SBarry Smith incomplete Cholesky decomposition of a square, symmetric, 233b2f1ab58SBarry Smith positive definite matrix. 234b2f1ab58SBarry Smith 235b2f1ab58SBarry Smith In case negative diagonals are encountered, function returns 236b2f1ab58SBarry Smith NEGATIVE_DIAGONAL. When this happens, call this function again 237b2f1ab58SBarry Smith with a larger epsdiag_in, a less sparse pattern, and/or a smaller 238b2f1ab58SBarry Smith droptol 239b2f1ab58SBarry Smith */ 240*9371c9d4SSatish Balay PetscErrorCode spbas_incomplete_cholesky(Mat A, const PetscInt *rip, const PetscInt *riip, spbas_matrix pattern, PetscReal droptol, PetscReal epsdiag_in, spbas_matrix *matrix_L, PetscBool *success) { 241b2f1ab58SBarry Smith PetscInt jL; 242b2f1ab58SBarry Smith Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 243b2f1ab58SBarry Smith PetscInt *ai = a->i, *aj = a->j; 244b2f1ab58SBarry Smith MatScalar *aa = a->a; 245b2f1ab58SBarry Smith PetscInt nrows, ncols; 246b2f1ab58SBarry Smith PetscInt *max_row_nnz; 247b2f1ab58SBarry Smith spbas_matrix retval; 248b2f1ab58SBarry Smith PetscScalar *diag; 249b2f1ab58SBarry Smith PetscScalar *val; 250b2f1ab58SBarry Smith PetscScalar *lvec; 251b2f1ab58SBarry Smith PetscScalar epsdiag; 252b2f1ab58SBarry Smith PetscInt i, j, k; 253ace3abfcSBarry Smith const PetscBool do_values = PETSC_TRUE; 2549ccc4a9bSBarry Smith PetscInt *r1_icol; 2559ccc4a9bSBarry Smith PetscScalar *r1_val; 2569ccc4a9bSBarry Smith PetscInt *r_icol; 2579ccc4a9bSBarry Smith PetscInt r_nnz; 2589ccc4a9bSBarry Smith PetscScalar *r_val; 2599ccc4a9bSBarry Smith PetscInt *A_icol; 2609ccc4a9bSBarry Smith PetscInt A_nnz; 2619ccc4a9bSBarry Smith PetscScalar *A_val; 2629ccc4a9bSBarry Smith PetscInt *p_icol; 2639ccc4a9bSBarry Smith PetscInt p_nnz; 2649ccc4a9bSBarry Smith PetscInt n_row_alloc_ok = 0; /* number of rows which have been stored correctly in the matrix */ 2659ccc4a9bSBarry Smith PetscInt n_alloc_used = 0; /* part of result->icols and result->values which is currently being used */ 266b2f1ab58SBarry Smith 2679ccc4a9bSBarry Smith PetscFunctionBegin; 2684e1ff37aSBarry Smith /* Convert the Manteuffel shift from 'fraction of average diagonal' to dimensioned value */ 2699566063dSJacob Faibussowitsch PetscCall(MatGetSize(A, &nrows, &ncols)); 2709566063dSJacob Faibussowitsch PetscCall(MatGetTrace(A, &epsdiag)); 2712205254eSKarl Rupp 272b2f1ab58SBarry Smith epsdiag *= epsdiag_in / nrows; 2732205254eSKarl Rupp 2749566063dSJacob Faibussowitsch PetscCall(PetscInfo(NULL, " Dimensioned Manteuffel shift %g Drop tolerance %g\n", (double)PetscRealPart(epsdiag), (double)droptol)); 275b2f1ab58SBarry Smith 2762205254eSKarl Rupp if (droptol < 1e-10) droptol = 1e-10; 277b2f1ab58SBarry Smith 278b2f1ab58SBarry Smith retval.nrows = nrows; 279b2f1ab58SBarry Smith retval.ncols = nrows; 280b2f1ab58SBarry Smith retval.nnz = pattern.nnz / 10; 281b2f1ab58SBarry Smith retval.col_idx_type = SPBAS_COLUMN_NUMBERS; 282b2f1ab58SBarry Smith retval.block_data = PETSC_TRUE; 283b2f1ab58SBarry Smith 2849566063dSJacob Faibussowitsch PetscCall(spbas_allocate_pattern(&retval, do_values)); 2859566063dSJacob Faibussowitsch PetscCall(PetscArrayzero(retval.row_nnz, nrows)); 2869566063dSJacob Faibussowitsch PetscCall(spbas_allocate_data(&retval)); 287b2f1ab58SBarry Smith retval.nnz = 0; 288b2f1ab58SBarry Smith 2899566063dSJacob Faibussowitsch PetscCall(PetscMalloc1(nrows, &diag)); 2909566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(nrows, &val)); 2919566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(nrows, &lvec)); 2929566063dSJacob Faibussowitsch PetscCall(PetscCalloc1(nrows, &max_row_nnz)); 293b2f1ab58SBarry Smith 294c328eeadSBarry Smith /* Count the nonzeros on transpose of pattern */ 2954e1ff37aSBarry Smith for (i = 0; i < nrows; i++) { 296b2f1ab58SBarry Smith p_nnz = pattern.row_nnz[i]; 297b2f1ab58SBarry Smith p_icol = pattern.icols[i]; 298*9371c9d4SSatish Balay for (j = 0; j < p_nnz; j++) { max_row_nnz[i + p_icol[j]]++; } 299b2f1ab58SBarry Smith } 300b2f1ab58SBarry Smith 301c328eeadSBarry Smith /* Calculate rows of L */ 3024e1ff37aSBarry Smith for (i = 0; i < nrows; i++) { 303b2f1ab58SBarry Smith p_nnz = pattern.row_nnz[i]; 304b2f1ab58SBarry Smith p_icol = pattern.icols[i]; 305b2f1ab58SBarry Smith 306b2f1ab58SBarry Smith r_nnz = retval.row_nnz[i]; 307b2f1ab58SBarry Smith r_icol = retval.icols[i]; 308b2f1ab58SBarry Smith r_val = retval.values[i]; 309b2f1ab58SBarry Smith A_nnz = ai[rip[i] + 1] - ai[rip[i]]; 310b2f1ab58SBarry Smith A_icol = &aj[ai[rip[i]]]; 311b2f1ab58SBarry Smith A_val = &aa[ai[rip[i]]]; 312b2f1ab58SBarry Smith 313c328eeadSBarry Smith /* Calculate val += A(i,i:n)'; */ 3144e1ff37aSBarry Smith for (j = 0; j < A_nnz; j++) { 315b2f1ab58SBarry Smith k = riip[A_icol[j]]; 3162205254eSKarl Rupp if (k >= i) val[k] = A_val[j]; 317b2f1ab58SBarry Smith } 318b2f1ab58SBarry Smith 319c328eeadSBarry Smith /* Add regularization */ 320b2f1ab58SBarry Smith val[i] += epsdiag; 321b2f1ab58SBarry Smith 322c328eeadSBarry Smith /* Calculate lvec = diag(D(0:i-1)) * L(0:i-1,i); 323c328eeadSBarry Smith val(i) = A(i,i) - L(0:i-1,i)' * lvec */ 3244e1ff37aSBarry Smith for (j = 0; j < r_nnz; j++) { 325b2f1ab58SBarry Smith k = r_icol[j]; 326b2f1ab58SBarry Smith lvec[k] = diag[k] * r_val[j]; 327b2f1ab58SBarry Smith val[i] -= r_val[j] * lvec[k]; 328b2f1ab58SBarry Smith } 329b2f1ab58SBarry Smith 330c328eeadSBarry Smith /* Calculate the new diagonal */ 331b2f1ab58SBarry Smith diag[i] = val[i]; 3324e1ff37aSBarry Smith if (PetscRealPart(diag[i]) < droptol) { 3339566063dSJacob Faibussowitsch PetscCall(PetscInfo(NULL, "Error in spbas_incomplete_cholesky:\n")); 3349566063dSJacob Faibussowitsch PetscCall(PetscInfo(NULL, "Negative diagonal in row %" PetscInt_FMT "\n", i + 1)); 335b2f1ab58SBarry Smith 336c328eeadSBarry Smith /* Delete the whole matrix at once. */ 3379566063dSJacob Faibussowitsch PetscCall(spbas_delete(retval)); 338cc442ecdSBarry Smith *success = PETSC_FALSE; 339cc442ecdSBarry Smith PetscFunctionReturn(0); 340b2f1ab58SBarry Smith } 341b2f1ab58SBarry Smith 342c328eeadSBarry Smith /* If necessary, allocate arrays */ 3434e1ff37aSBarry Smith if (r_nnz == 0) { 344cc442ecdSBarry Smith PetscBool success = spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used); 34528b400f6SJacob Faibussowitsch PetscCheck(success, PETSC_COMM_SELF, PETSC_ERR_MEM, "spbas_cholesky_row_alloc() failed"); 346b2f1ab58SBarry Smith r_icol = retval.icols[i]; 347b2f1ab58SBarry Smith r_val = retval.values[i]; 348b2f1ab58SBarry Smith } 349b2f1ab58SBarry Smith 350c328eeadSBarry Smith /* Now, fill in */ 351b2f1ab58SBarry Smith r_icol[r_nnz] = i; 352b2f1ab58SBarry Smith r_val[r_nnz] = 1.0; 353b2f1ab58SBarry Smith r_nnz++; 354b2f1ab58SBarry Smith retval.row_nnz[i]++; 355b2f1ab58SBarry Smith 356b2f1ab58SBarry Smith retval.nnz += r_nnz; 357b2f1ab58SBarry Smith 358c328eeadSBarry Smith /* Calculate 359c328eeadSBarry Smith val(i+1:n) = (A(i,i+1:n)- L(0:i-1,i+1:n)' * lvec)/diag(i) */ 3604e1ff37aSBarry Smith for (j = 1; j < p_nnz; j++) { 361b2f1ab58SBarry Smith k = i + p_icol[j]; 362b2f1ab58SBarry Smith r1_icol = retval.icols[k]; 363b2f1ab58SBarry Smith r1_val = retval.values[k]; 364*9371c9d4SSatish Balay for (jL = 0; jL < retval.row_nnz[k]; jL++) { val[k] -= r1_val[jL] * lvec[r1_icol[jL]]; } 365b2f1ab58SBarry Smith val[k] /= diag[i]; 366b2f1ab58SBarry Smith 3674e1ff37aSBarry Smith if (PetscAbsScalar(val[k]) > droptol || PetscAbsScalar(val[k]) < -droptol) { 368c328eeadSBarry Smith /* If necessary, allocate arrays */ 369cc442ecdSBarry Smith if (!retval.row_nnz[k]) { 370cc442ecdSBarry Smith PetscBool flag, success = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used); 371cc442ecdSBarry Smith if (!success) { 3729566063dSJacob Faibussowitsch PetscCall(spbas_cholesky_garbage_collect(&retval, i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz)); 373cc442ecdSBarry Smith flag = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used); 37428b400f6SJacob Faibussowitsch PetscCheck(flag, PETSC_COMM_SELF, PETSC_ERR_MEM, "Allocation in spbas_cholesky_row_alloc() failed"); 375b2f1ab58SBarry Smith r_icol = retval.icols[i]; 376cc442ecdSBarry Smith } 377b2f1ab58SBarry Smith } 378b2f1ab58SBarry Smith 379b2f1ab58SBarry Smith retval.icols[k][retval.row_nnz[k]] = i; 380b2f1ab58SBarry Smith retval.values[k][retval.row_nnz[k]] = val[k]; 381b2f1ab58SBarry Smith retval.row_nnz[k]++; 382b2f1ab58SBarry Smith } 383b2f1ab58SBarry Smith val[k] = 0; 384b2f1ab58SBarry Smith } 385b2f1ab58SBarry Smith 38671d55d18SBarry Smith /* Erase the values used in the work arrays */ 3872205254eSKarl Rupp for (j = 0; j < r_nnz; j++) lvec[r_icol[j]] = 0; 388b2f1ab58SBarry Smith } 389b2f1ab58SBarry Smith 3909566063dSJacob Faibussowitsch PetscCall(PetscFree(lvec)); 3919566063dSJacob Faibussowitsch PetscCall(PetscFree(val)); 392b2f1ab58SBarry Smith 3939566063dSJacob Faibussowitsch PetscCall(spbas_cholesky_garbage_collect(&retval, nrows, &n_row_alloc_ok, &n_alloc_used, max_row_nnz)); 3949566063dSJacob Faibussowitsch PetscCall(PetscFree(max_row_nnz)); 395b2f1ab58SBarry Smith 396c328eeadSBarry Smith /* Place the inverse of the diagonals in the matrix */ 3979ccc4a9bSBarry Smith for (i = 0; i < nrows; i++) { 398b2f1ab58SBarry Smith r_nnz = retval.row_nnz[i]; 3992205254eSKarl Rupp 400b2f1ab58SBarry Smith retval.values[i][r_nnz - 1] = 1.0 / diag[i]; 401*9371c9d4SSatish Balay for (j = 0; j < r_nnz - 1; j++) { retval.values[i][j] *= -1; } 402b2f1ab58SBarry Smith } 4039566063dSJacob Faibussowitsch PetscCall(PetscFree(diag)); 404b2f1ab58SBarry Smith *matrix_L = retval; 405cc442ecdSBarry Smith *success = PETSC_TRUE; 406b2f1ab58SBarry Smith PetscFunctionReturn(0); 407b2f1ab58SBarry Smith } 408