1 #ifndef SPBAS_CHOLESKY_H 2 #define SPBAS_CHOLESKY_H 3 /* 4 spbas_cholesky_row_alloc: 5 in the data arrays, find a place where another row may be stored. 6 Return PETSC_ERR_MEM if there is insufficient space to store the 7 row, so garbage collection and/or re-allocation may be done. 8 */ 9 static PetscBool spbas_cholesky_row_alloc(spbas_matrix retval, PetscInt k, PetscInt r_nnz, PetscInt *n_alloc_used) 10 { 11 retval.icols[k] = &retval.alloc_icol[*n_alloc_used]; 12 retval.values[k] = &retval.alloc_val[*n_alloc_used]; 13 *n_alloc_used += r_nnz; 14 return (*n_alloc_used > retval.n_alloc_icol) ? PETSC_FALSE : PETSC_TRUE; 15 } 16 17 /* 18 spbas_cholesky_garbage_collect: 19 move the rows which have been calculated so far, as well as 20 those currently under construction, to the front of the 21 array, while putting them in the proper order. 22 When it seems necessary, enlarge the current arrays. 23 24 NB: re-allocation is being simulated using 25 PetscMalloc, memcpy, PetscFree, because 26 PetscRealloc does not seem to exist. 27 28 */ 29 static PetscErrorCode spbas_cholesky_garbage_collect(spbas_matrix *result, /* I/O: the Cholesky factor matrix being constructed. 30 Only the storage, not the contents of this matrix is changed in this function */ 31 PetscInt i_row, /* I : Number of rows for which the final contents are known */ 32 PetscInt *n_row_alloc_ok, /* I/O: Number of rows which are already in their final 33 places in the arrays: they need not be moved any more */ 34 PetscInt *n_alloc_used, /* I/O: */ 35 PetscInt *max_row_nnz) /* I : Over-estimate of the number of nonzeros needed to store each row */ 36 { 37 /* PSEUDO-CODE: 38 1. Choose the appropriate size for the arrays 39 2. Rescue the arrays which would be lost during garbage collection 40 3. Reallocate and correct administration 41 4. Move all arrays so that they are in proper order */ 42 43 PetscInt i, j; 44 PetscInt nrows = result->nrows; 45 PetscInt n_alloc_ok = 0; 46 PetscInt n_alloc_ok_max = 0; 47 PetscInt need_already = 0; 48 PetscInt max_need_extra = 0; 49 PetscInt n_alloc_max, n_alloc_est, n_alloc; 50 PetscInt n_alloc_now = result->n_alloc_icol; 51 PetscInt *alloc_icol_old = result->alloc_icol; 52 PetscScalar *alloc_val_old = result->alloc_val; 53 PetscInt *icol_rescue; 54 PetscScalar *val_rescue; 55 PetscInt n_rescue; 56 PetscInt i_here, i_last, n_copy; 57 const PetscReal xtra_perc = 20; 58 59 PetscFunctionBegin; 60 /********************************************************* 61 1. Choose appropriate array size 62 Count number of rows and memory usage which is already final */ 63 for (i = 0; i < i_row; i++) { 64 n_alloc_ok += result->row_nnz[i]; 65 n_alloc_ok_max += max_row_nnz[i]; 66 } 67 68 /* Count currently needed memory usage and future memory requirements 69 (max, predicted)*/ 70 for (i = i_row; i < nrows; i++) { 71 if (!result->row_nnz[i]) { 72 max_need_extra += max_row_nnz[i]; 73 } else { 74 need_already += max_row_nnz[i]; 75 } 76 } 77 78 /* Make maximal and realistic memory requirement estimates */ 79 n_alloc_max = n_alloc_ok + need_already + max_need_extra; 80 n_alloc_est = n_alloc_ok + need_already + (int)(((PetscReal)max_need_extra) * ((PetscReal)n_alloc_ok) / ((PetscReal)n_alloc_ok_max)); 81 82 /* Choose array sizes */ 83 if (n_alloc_max == n_alloc_est) n_alloc = n_alloc_max; 84 else if (n_alloc_now >= n_alloc_est) n_alloc = n_alloc_now; 85 else if (n_alloc_max < n_alloc_est * (1 + xtra_perc / 100.0)) n_alloc = n_alloc_max; 86 else n_alloc = (int)(n_alloc_est * (1 + xtra_perc / 100.0)); 87 88 /* If new estimate is less than what we already have, 89 don't reallocate, just garbage-collect */ 90 if (n_alloc_max != n_alloc_est && n_alloc < result->n_alloc_icol) n_alloc = result->n_alloc_icol; 91 92 /* Motivate dimension choice */ 93 PetscCall(PetscInfo(NULL, " Allocating %" PetscInt_FMT " nonzeros: ", n_alloc)); /* checkbadSource \n */ 94 if (n_alloc_max == n_alloc_est) { 95 PetscCall(PetscInfo(NULL, "this is the correct size\n")); 96 } else if (n_alloc_now >= n_alloc_est) { 97 PetscCall(PetscInfo(NULL, "the current size, which seems enough\n")); 98 } else if (n_alloc_max < n_alloc_est * (1 + xtra_perc / 100.0)) { 99 PetscCall(PetscInfo(NULL, "the maximum estimate\n")); 100 } else { 101 PetscCall(PetscInfo(NULL, "%6.2f %% more than the estimate\n", (double)xtra_perc)); 102 } 103 104 /********************************************************** 105 2. Rescue arrays which would be lost 106 Count how many rows and nonzeros will have to be rescued 107 when moving all arrays in place */ 108 n_rescue = 0; 109 if (*n_row_alloc_ok == 0) *n_alloc_used = 0; 110 else { 111 i = *n_row_alloc_ok - 1; 112 113 *n_alloc_used = (result->icols[i] - result->alloc_icol) + result->row_nnz[i]; 114 } 115 116 for (i = *n_row_alloc_ok; i < nrows; i++) { 117 i_here = result->icols[i] - result->alloc_icol; 118 i_last = i_here + result->row_nnz[i]; 119 if (result->row_nnz[i] > 0) { 120 if (*n_alloc_used > i_here || i_last > n_alloc) n_rescue += result->row_nnz[i]; 121 122 if (i < i_row) *n_alloc_used += result->row_nnz[i]; 123 else *n_alloc_used += max_row_nnz[i]; 124 } 125 } 126 127 /* Allocate rescue arrays */ 128 PetscCall(PetscMalloc1(n_rescue, &icol_rescue)); 129 PetscCall(PetscMalloc1(n_rescue, &val_rescue)); 130 131 /* Rescue the arrays which need rescuing */ 132 n_rescue = 0; 133 if (*n_row_alloc_ok == 0) *n_alloc_used = 0; 134 else { 135 i = *n_row_alloc_ok - 1; 136 *n_alloc_used = (result->icols[i] - result->alloc_icol) + result->row_nnz[i]; 137 } 138 139 for (i = *n_row_alloc_ok; i < nrows; i++) { 140 i_here = result->icols[i] - result->alloc_icol; 141 i_last = i_here + result->row_nnz[i]; 142 if (result->row_nnz[i] > 0) { 143 if (*n_alloc_used > i_here || i_last > n_alloc) { 144 PetscCall(PetscArraycpy(&icol_rescue[n_rescue], result->icols[i], result->row_nnz[i])); 145 PetscCall(PetscArraycpy(&val_rescue[n_rescue], result->values[i], result->row_nnz[i])); 146 n_rescue += result->row_nnz[i]; 147 } 148 149 if (i < i_row) *n_alloc_used += result->row_nnz[i]; 150 else *n_alloc_used += max_row_nnz[i]; 151 } 152 } 153 154 /********************************************************** 155 3. Reallocate and correct administration */ 156 157 if (n_alloc != result->n_alloc_icol) { 158 n_copy = PetscMin(n_alloc, result->n_alloc_icol); 159 160 /* PETSC knows no REALLOC, so we'll REALLOC ourselves. 161 162 Allocate new icol-data, copy old contents */ 163 PetscCall(PetscMalloc1(n_alloc, &result->alloc_icol)); 164 PetscCall(PetscArraycpy(result->alloc_icol, alloc_icol_old, n_copy)); 165 166 /* Update administration, Reset pointers to new arrays */ 167 result->n_alloc_icol = n_alloc; 168 for (i = 0; i < nrows; i++) { 169 result->icols[i] = result->alloc_icol + (result->icols[i] - alloc_icol_old); 170 result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol); 171 } 172 173 /* Delete old array */ 174 PetscCall(PetscFree(alloc_icol_old)); 175 176 /* Allocate new value-data, copy old contents */ 177 PetscCall(PetscMalloc1(n_alloc, &result->alloc_val)); 178 PetscCall(PetscArraycpy(result->alloc_val, alloc_val_old, n_copy)); 179 180 /* Update administration, Reset pointers to new arrays */ 181 result->n_alloc_val = n_alloc; 182 for (i = 0; i < nrows; i++) result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol); 183 184 /* Delete old array */ 185 PetscCall(PetscFree(alloc_val_old)); 186 } 187 188 /********************************************************* 189 4. Copy all the arrays to their proper places */ 190 n_rescue = 0; 191 if (*n_row_alloc_ok == 0) *n_alloc_used = 0; 192 else { 193 i = *n_row_alloc_ok - 1; 194 195 *n_alloc_used = (result->icols[i] - result->alloc_icol) + result->row_nnz[i]; 196 } 197 198 for (i = *n_row_alloc_ok; i < nrows; i++) { 199 i_here = result->icols[i] - result->alloc_icol; 200 i_last = i_here + result->row_nnz[i]; 201 202 result->icols[i] = result->alloc_icol + *n_alloc_used; 203 result->values[i] = result->alloc_val + *n_alloc_used; 204 205 if (result->row_nnz[i] > 0) { 206 if (*n_alloc_used > i_here || i_last > n_alloc) { 207 PetscCall(PetscArraycpy(result->icols[i], &icol_rescue[n_rescue], result->row_nnz[i])); 208 PetscCall(PetscArraycpy(result->values[i], &val_rescue[n_rescue], result->row_nnz[i])); 209 210 n_rescue += result->row_nnz[i]; 211 } else { 212 for (j = 0; j < result->row_nnz[i]; j++) { 213 result->icols[i][j] = result->alloc_icol[i_here + j]; 214 result->values[i][j] = result->alloc_val[i_here + j]; 215 } 216 } 217 if (i < i_row) *n_alloc_used += result->row_nnz[i]; 218 else *n_alloc_used += max_row_nnz[i]; 219 } 220 } 221 222 /* Delete the rescue arrays */ 223 PetscCall(PetscFree(icol_rescue)); 224 PetscCall(PetscFree(val_rescue)); 225 226 *n_row_alloc_ok = i_row; 227 PetscFunctionReturn(PETSC_SUCCESS); 228 } 229 230 /* 231 spbas_incomplete_cholesky: 232 incomplete Cholesky decomposition of a square, symmetric, 233 positive definite matrix. 234 235 In case negative diagonals are encountered, function returns 236 NEGATIVE_DIAGONAL. When this happens, call this function again 237 with a larger epsdiag_in, a less sparse pattern, and/or a smaller 238 droptol 239 */ 240 PetscErrorCode spbas_incomplete_cholesky(Mat A, const PetscInt *rip, const PetscInt *riip, spbas_matrix pattern, PetscReal droptol, PetscReal epsdiag_in, spbas_matrix *matrix_L, PetscBool *success) 241 { 242 PetscInt jL; 243 Mat_SeqAIJ *a = (Mat_SeqAIJ *)A->data; 244 PetscInt *ai = a->i, *aj = a->j; 245 MatScalar *aa = a->a; 246 PetscInt nrows, ncols; 247 PetscInt *max_row_nnz; 248 spbas_matrix retval; 249 PetscScalar *diag; 250 PetscScalar *val; 251 PetscScalar *lvec; 252 PetscScalar epsdiag; 253 PetscInt i, j, k; 254 const PetscBool do_values = PETSC_TRUE; 255 PetscInt *r1_icol; 256 PetscScalar *r1_val; 257 PetscInt *r_icol; 258 PetscInt r_nnz; 259 PetscScalar *r_val; 260 PetscInt *A_icol; 261 PetscInt A_nnz; 262 PetscScalar *A_val; 263 PetscInt *p_icol; 264 PetscInt p_nnz; 265 PetscInt n_row_alloc_ok = 0; /* number of rows which have been stored correctly in the matrix */ 266 PetscInt n_alloc_used = 0; /* part of result->icols and result->values which is currently being used */ 267 268 PetscFunctionBegin; 269 /* Convert the Manteuffel shift from 'fraction of average diagonal' to dimensioned value */ 270 PetscCall(MatGetSize(A, &nrows, &ncols)); 271 PetscCall(MatGetTrace(A, &epsdiag)); 272 273 epsdiag *= epsdiag_in / nrows; 274 275 PetscCall(PetscInfo(NULL, " Dimensioned Manteuffel shift %g Drop tolerance %g\n", (double)PetscRealPart(epsdiag), (double)droptol)); 276 277 if (droptol < 1e-10) droptol = 1e-10; 278 279 retval.nrows = nrows; 280 retval.ncols = nrows; 281 retval.nnz = pattern.nnz / 10; 282 retval.col_idx_type = SPBAS_COLUMN_NUMBERS; 283 retval.block_data = PETSC_TRUE; 284 285 PetscCall(spbas_allocate_pattern(&retval, do_values)); 286 PetscCall(PetscArrayzero(retval.row_nnz, nrows)); 287 PetscCall(spbas_allocate_data(&retval)); 288 retval.nnz = 0; 289 290 PetscCall(PetscMalloc1(nrows, &diag)); 291 PetscCall(PetscCalloc1(nrows, &val)); 292 PetscCall(PetscCalloc1(nrows, &lvec)); 293 PetscCall(PetscCalloc1(nrows, &max_row_nnz)); 294 295 /* Count the nonzeros on transpose of pattern */ 296 for (i = 0; i < nrows; i++) { 297 p_nnz = pattern.row_nnz[i]; 298 p_icol = pattern.icols[i]; 299 for (j = 0; j < p_nnz; j++) max_row_nnz[i + p_icol[j]]++; 300 } 301 302 /* Calculate rows of L */ 303 for (i = 0; i < nrows; i++) { 304 p_nnz = pattern.row_nnz[i]; 305 p_icol = pattern.icols[i]; 306 307 r_nnz = retval.row_nnz[i]; 308 r_icol = retval.icols[i]; 309 r_val = retval.values[i]; 310 A_nnz = ai[rip[i] + 1] - ai[rip[i]]; 311 A_icol = &aj[ai[rip[i]]]; 312 A_val = &aa[ai[rip[i]]]; 313 314 /* Calculate val += A(i,i:n)'; */ 315 for (j = 0; j < A_nnz; j++) { 316 k = riip[A_icol[j]]; 317 if (k >= i) val[k] = A_val[j]; 318 } 319 320 /* Add regularization */ 321 val[i] += epsdiag; 322 323 /* Calculate lvec = diag(D(0:i-1)) * L(0:i-1,i); 324 val(i) = A(i,i) - L(0:i-1,i)' * lvec */ 325 for (j = 0; j < r_nnz; j++) { 326 k = r_icol[j]; 327 lvec[k] = diag[k] * r_val[j]; 328 val[i] -= r_val[j] * lvec[k]; 329 } 330 331 /* Calculate the new diagonal */ 332 diag[i] = val[i]; 333 if (PetscRealPart(diag[i]) < droptol) { 334 PetscCall(PetscInfo(NULL, "Error in spbas_incomplete_cholesky:\n")); 335 PetscCall(PetscInfo(NULL, "Negative diagonal in row %" PetscInt_FMT "\n", i + 1)); 336 337 /* Delete the whole matrix at once. */ 338 PetscCall(spbas_delete(retval)); 339 *success = PETSC_FALSE; 340 PetscFunctionReturn(PETSC_SUCCESS); 341 } 342 343 /* If necessary, allocate arrays */ 344 if (r_nnz == 0) { 345 PetscBool success = spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used); 346 PetscCheck(success, PETSC_COMM_SELF, PETSC_ERR_MEM, "spbas_cholesky_row_alloc() failed"); 347 r_icol = retval.icols[i]; 348 r_val = retval.values[i]; 349 } 350 351 /* Now, fill in */ 352 r_icol[r_nnz] = i; 353 r_val[r_nnz] = 1.0; 354 r_nnz++; 355 retval.row_nnz[i]++; 356 357 retval.nnz += r_nnz; 358 359 /* Calculate 360 val(i+1:n) = (A(i,i+1:n)- L(0:i-1,i+1:n)' * lvec)/diag(i) */ 361 for (j = 1; j < p_nnz; j++) { 362 k = i + p_icol[j]; 363 r1_icol = retval.icols[k]; 364 r1_val = retval.values[k]; 365 for (jL = 0; jL < retval.row_nnz[k]; jL++) val[k] -= r1_val[jL] * lvec[r1_icol[jL]]; 366 val[k] /= diag[i]; 367 368 if (PetscAbsScalar(val[k]) > droptol || PetscAbsScalar(val[k]) < -droptol) { 369 /* If necessary, allocate arrays */ 370 if (!retval.row_nnz[k]) { 371 PetscBool flag, success = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used); 372 if (!success) { 373 PetscCall(spbas_cholesky_garbage_collect(&retval, i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz)); 374 flag = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used); 375 PetscCheck(flag, PETSC_COMM_SELF, PETSC_ERR_MEM, "Allocation in spbas_cholesky_row_alloc() failed"); 376 r_icol = retval.icols[i]; 377 } 378 } 379 380 retval.icols[k][retval.row_nnz[k]] = i; 381 retval.values[k][retval.row_nnz[k]] = val[k]; 382 retval.row_nnz[k]++; 383 } 384 val[k] = 0; 385 } 386 387 /* Erase the values used in the work arrays */ 388 for (j = 0; j < r_nnz; j++) lvec[r_icol[j]] = 0; 389 } 390 391 PetscCall(PetscFree(lvec)); 392 PetscCall(PetscFree(val)); 393 394 PetscCall(spbas_cholesky_garbage_collect(&retval, nrows, &n_row_alloc_ok, &n_alloc_used, max_row_nnz)); 395 PetscCall(PetscFree(max_row_nnz)); 396 397 /* Place the inverse of the diagonals in the matrix */ 398 for (i = 0; i < nrows; i++) { 399 r_nnz = retval.row_nnz[i]; 400 401 retval.values[i][r_nnz - 1] = 1.0 / diag[i]; 402 for (j = 0; j < r_nnz - 1; j++) retval.values[i][j] *= -1; 403 } 404 PetscCall(PetscFree(diag)); 405 *matrix_L = retval; 406 *success = PETSC_TRUE; 407 PetscFunctionReturn(PETSC_SUCCESS); 408 } 409 410 #endif 411