1 2 /* 3 spbas_cholesky_row_alloc: 4 in the data arrays, find a place where another row may be stored. 5 Return PETSC_ERR_MEM if there is insufficient space to store the 6 row, so garbage collection and/or re-allocation may be done. 7 */ 8 PetscBool spbas_cholesky_row_alloc(spbas_matrix retval, PetscInt k, PetscInt r_nnz,PetscInt * n_alloc_used) 9 { 10 PetscFunctionBegin; 11 retval.icols[k] = &retval.alloc_icol[*n_alloc_used]; 12 retval.values[k] = &retval.alloc_val[*n_alloc_used]; 13 *n_alloc_used += r_nnz; 14 if (*n_alloc_used > retval.n_alloc_icol) PetscFunctionReturn(PETSC_FALSE); 15 PetscFunctionReturn(PETSC_TRUE); 16 } 17 18 /* 19 spbas_cholesky_garbage_collect: 20 move the rows which have been calculated so far, as well as 21 those currently under construction, to the front of the 22 array, while putting them in the proper order. 23 When it seems necessary, enlarge the current arrays. 24 25 NB: re-allocation is being simulated using 26 PetscMalloc, memcpy, PetscFree, because 27 PetscRealloc does not seem to exist. 28 29 */ 30 PetscErrorCode spbas_cholesky_garbage_collect(spbas_matrix *result, /* I/O: the Cholesky factor matrix being constructed. 31 Only the storage, not the contents of this matrix is changed in this function */ 32 PetscInt i_row, /* I : Number of rows for which the final contents are known */ 33 PetscInt *n_row_alloc_ok, /* I/O: Number of rows which are already in their final 34 places in the arrays: they need not be moved any more */ 35 PetscInt *n_alloc_used, /* I/O: */ 36 PetscInt *max_row_nnz) /* I : Over-estimate of the number of nonzeros needed to store each row */ 37 { 38 /* PSEUDO-CODE: 39 1. Choose the appropriate size for the arrays 40 2. Rescue the arrays which would be lost during garbage collection 41 3. Reallocate and correct administration 42 4. Move all arrays so that they are in proper order */ 43 44 PetscInt i,j; 45 PetscInt nrows = result->nrows; 46 PetscInt n_alloc_ok =0; 47 PetscInt n_alloc_ok_max=0; 48 PetscInt need_already = 0; 49 PetscInt n_rows_ahead =0; 50 PetscInt max_need_extra= 0; 51 PetscInt n_alloc_max, n_alloc_est, n_alloc; 52 PetscInt n_alloc_now = result->n_alloc_icol; 53 PetscInt *alloc_icol_old = result->alloc_icol; 54 PetscScalar *alloc_val_old = result->alloc_val; 55 PetscInt *icol_rescue; 56 PetscScalar *val_rescue; 57 PetscInt n_rescue; 58 PetscInt n_row_rescue; 59 PetscInt i_here, i_last, n_copy; 60 const PetscReal xtra_perc = 20; 61 62 PetscFunctionBegin; 63 /********************************************************* 64 1. Choose appropriate array size 65 Count number of rows and memory usage which is already final */ 66 for (i=0; i<i_row; i++) { 67 n_alloc_ok += result->row_nnz[i]; 68 n_alloc_ok_max += max_row_nnz[i]; 69 } 70 71 /* Count currently needed memory usage and future memory requirements 72 (max, predicted)*/ 73 for (i=i_row; i<nrows; i++) { 74 if (!result->row_nnz[i]) { 75 max_need_extra += max_row_nnz[i]; 76 } else { 77 need_already += max_row_nnz[i]; 78 n_rows_ahead++; 79 } 80 } 81 82 /* Make maximal and realistic memory requirement estimates */ 83 n_alloc_max = n_alloc_ok + need_already + max_need_extra; 84 n_alloc_est = n_alloc_ok + need_already + (int) (((PetscReal) max_need_extra) * ((PetscReal) n_alloc_ok) /((PetscReal) n_alloc_ok_max)); 85 86 /* Choose array sizes */ 87 if (n_alloc_max == n_alloc_est) n_alloc = n_alloc_max; 88 else if (n_alloc_now >= n_alloc_est) n_alloc = n_alloc_now; 89 else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) n_alloc = n_alloc_max; 90 else n_alloc = (int) (n_alloc_est * (1+xtra_perc/100.0)); 91 92 /* If new estimate is less than what we already have, 93 don't reallocate, just garbage-collect */ 94 if (n_alloc_max != n_alloc_est && n_alloc < result->n_alloc_icol) { 95 n_alloc = result->n_alloc_icol; 96 } 97 98 /* Motivate dimension choice */ 99 PetscCall(PetscInfo(NULL," Allocating %" PetscInt_FMT " nonzeros: ",n_alloc)); 100 if (n_alloc_max == n_alloc_est) { 101 PetscCall(PetscInfo(NULL,"this is the correct size\n")); 102 } else if (n_alloc_now >= n_alloc_est) { 103 PetscCall(PetscInfo(NULL,"the current size, which seems enough\n")); 104 } else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) { 105 PetscCall(PetscInfo(NULL,"the maximum estimate\n")); 106 } else { 107 PetscCall(PetscInfo(NULL,"%6.2f %% more than the estimate\n",(double)xtra_perc)); 108 } 109 110 /********************************************************** 111 2. Rescue arrays which would be lost 112 Count how many rows and nonzeros will have to be rescued 113 when moving all arrays in place */ 114 n_row_rescue = 0; n_rescue = 0; 115 if (*n_row_alloc_ok==0) *n_alloc_used = 0; 116 else { 117 i = *n_row_alloc_ok - 1; 118 119 *n_alloc_used = (result->icols[i]-result->alloc_icol) + result->row_nnz[i]; 120 } 121 122 for (i=*n_row_alloc_ok; i<nrows; i++) { 123 i_here = result->icols[i]-result->alloc_icol; 124 i_last = i_here + result->row_nnz[i]; 125 if (result->row_nnz[i]>0) { 126 if (*n_alloc_used > i_here || i_last > n_alloc) { 127 n_rescue += result->row_nnz[i]; 128 n_row_rescue++; 129 } 130 131 if (i<i_row) *n_alloc_used += result->row_nnz[i]; 132 else *n_alloc_used += max_row_nnz[i]; 133 } 134 } 135 136 /* Allocate rescue arrays */ 137 PetscCall(PetscMalloc1(n_rescue, &icol_rescue)); 138 PetscCall(PetscMalloc1(n_rescue, &val_rescue)); 139 140 /* Rescue the arrays which need rescuing */ 141 n_row_rescue = 0; n_rescue = 0; 142 if (*n_row_alloc_ok==0) *n_alloc_used = 0; 143 else { 144 i = *n_row_alloc_ok - 1; 145 *n_alloc_used = (result->icols[i]-result->alloc_icol) + result->row_nnz[i]; 146 } 147 148 for (i=*n_row_alloc_ok; i<nrows; i++) { 149 i_here = result->icols[i]-result->alloc_icol; 150 i_last = i_here + result->row_nnz[i]; 151 if (result->row_nnz[i]>0) { 152 if (*n_alloc_used > i_here || i_last > n_alloc) { 153 PetscCall(PetscArraycpy(&icol_rescue[n_rescue], result->icols[i], result->row_nnz[i])); 154 PetscCall(PetscArraycpy(&val_rescue[n_rescue], result->values[i], result->row_nnz[i])); 155 n_rescue += result->row_nnz[i]; 156 n_row_rescue++; 157 } 158 159 if (i<i_row) *n_alloc_used += result->row_nnz[i]; 160 else *n_alloc_used += max_row_nnz[i]; 161 } 162 } 163 164 /********************************************************** 165 3. Reallocate and correct administration */ 166 167 if (n_alloc != result->n_alloc_icol) { 168 n_copy = PetscMin(n_alloc,result->n_alloc_icol); 169 170 /* PETSC knows no REALLOC, so we'll REALLOC ourselves. 171 172 Allocate new icol-data, copy old contents */ 173 PetscCall(PetscMalloc1(n_alloc, &result->alloc_icol)); 174 PetscCall(PetscArraycpy(result->alloc_icol, alloc_icol_old, n_copy)); 175 176 /* Update administration, Reset pointers to new arrays */ 177 result->n_alloc_icol = n_alloc; 178 for (i=0; i<nrows; i++) { 179 result->icols[i] = result->alloc_icol + (result->icols[i] - alloc_icol_old); 180 result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol); 181 } 182 183 /* Delete old array */ 184 PetscCall(PetscFree(alloc_icol_old)); 185 186 /* Allocate new value-data, copy old contents */ 187 PetscCall(PetscMalloc1(n_alloc, &result->alloc_val)); 188 PetscCall(PetscArraycpy(result->alloc_val, alloc_val_old, n_copy)); 189 190 /* Update administration, Reset pointers to new arrays */ 191 result->n_alloc_val = n_alloc; 192 for (i=0; i<nrows; i++) { 193 result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol); 194 } 195 196 /* Delete old array */ 197 PetscCall(PetscFree(alloc_val_old)); 198 } 199 200 /********************************************************* 201 4. Copy all the arrays to their proper places */ 202 n_row_rescue = 0; n_rescue = 0; 203 if (*n_row_alloc_ok==0) *n_alloc_used = 0; 204 else { 205 i = *n_row_alloc_ok - 1; 206 207 *n_alloc_used = (result->icols[i]-result->alloc_icol) + result->row_nnz[i]; 208 } 209 210 for (i=*n_row_alloc_ok; i<nrows; i++) { 211 i_here = result->icols[i]-result->alloc_icol; 212 i_last = i_here + result->row_nnz[i]; 213 214 result->icols[i] = result->alloc_icol + *n_alloc_used; 215 result->values[i]= result->alloc_val + *n_alloc_used; 216 217 if (result->row_nnz[i]>0) { 218 if (*n_alloc_used > i_here || i_last > n_alloc) { 219 PetscCall(PetscArraycpy(result->icols[i], &icol_rescue[n_rescue], result->row_nnz[i])); 220 PetscCall(PetscArraycpy(result->values[i],&val_rescue[n_rescue],result->row_nnz[i])); 221 222 n_rescue += result->row_nnz[i]; 223 n_row_rescue++; 224 } else { 225 for (j=0; j<result->row_nnz[i]; j++) { 226 result->icols[i][j] = result->alloc_icol[i_here+j]; 227 result->values[i][j] = result->alloc_val[i_here+j]; 228 } 229 } 230 if (i<i_row) *n_alloc_used += result->row_nnz[i]; 231 else *n_alloc_used += max_row_nnz[i]; 232 } 233 } 234 235 /* Delete the rescue arrays */ 236 PetscCall(PetscFree(icol_rescue)); 237 PetscCall(PetscFree(val_rescue)); 238 239 *n_row_alloc_ok = i_row; 240 PetscFunctionReturn(0); 241 } 242 243 /* 244 spbas_incomplete_cholesky: 245 incomplete Cholesky decomposition of a square, symmetric, 246 positive definite matrix. 247 248 In case negative diagonals are encountered, function returns 249 NEGATIVE_DIAGONAL. When this happens, call this function again 250 with a larger epsdiag_in, a less sparse pattern, and/or a smaller 251 droptol 252 */ 253 PetscErrorCode spbas_incomplete_cholesky(Mat A, const PetscInt *rip, const PetscInt *riip, spbas_matrix pattern, PetscReal droptol, PetscReal epsdiag_in, spbas_matrix * matrix_L,PetscBool *success) 254 { 255 PetscInt jL; 256 Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data; 257 PetscInt *ai=a->i,*aj=a->j; 258 MatScalar *aa=a->a; 259 PetscInt nrows, ncols; 260 PetscInt *max_row_nnz; 261 spbas_matrix retval; 262 PetscScalar *diag; 263 PetscScalar *val; 264 PetscScalar *lvec; 265 PetscScalar epsdiag; 266 PetscInt i,j,k; 267 const PetscBool do_values = PETSC_TRUE; 268 PetscInt *r1_icol; 269 PetscScalar *r1_val; 270 PetscInt *r_icol; 271 PetscInt r_nnz; 272 PetscScalar *r_val; 273 PetscInt *A_icol; 274 PetscInt A_nnz; 275 PetscScalar *A_val; 276 PetscInt *p_icol; 277 PetscInt p_nnz; 278 PetscInt n_row_alloc_ok = 0; /* number of rows which have been stored correctly in the matrix */ 279 PetscInt n_alloc_used = 0; /* part of result->icols and result->values which is currently being used */ 280 281 PetscFunctionBegin; 282 /* Convert the Manteuffel shift from 'fraction of average diagonal' to dimensioned value */ 283 PetscCall(MatGetSize(A, &nrows, &ncols)); 284 PetscCall(MatGetTrace(A, &epsdiag)); 285 286 epsdiag *= epsdiag_in / nrows; 287 288 PetscCall(PetscInfo(NULL," Dimensioned Manteuffel shift %g Drop tolerance %g\n", (double)PetscRealPart(epsdiag),(double)droptol)); 289 290 if (droptol<1e-10) droptol=1e-10; 291 292 retval.nrows = nrows; 293 retval.ncols = nrows; 294 retval.nnz = pattern.nnz/10; 295 retval.col_idx_type = SPBAS_COLUMN_NUMBERS; 296 retval.block_data = PETSC_TRUE; 297 298 PetscCall(spbas_allocate_pattern(&retval, do_values)); 299 PetscCall(PetscArrayzero(retval.row_nnz, nrows)); 300 PetscCall(spbas_allocate_data(&retval)); 301 retval.nnz = 0; 302 303 PetscCall(PetscMalloc1(nrows, &diag)); 304 PetscCall(PetscCalloc1(nrows, &val)); 305 PetscCall(PetscCalloc1(nrows, &lvec)); 306 PetscCall(PetscCalloc1(nrows, &max_row_nnz)); 307 308 /* Count the nonzeros on transpose of pattern */ 309 for (i = 0; i<nrows; i++) { 310 p_nnz = pattern.row_nnz[i]; 311 p_icol = pattern.icols[i]; 312 for (j=0; j<p_nnz; j++) { 313 max_row_nnz[i+p_icol[j]]++; 314 } 315 } 316 317 /* Calculate rows of L */ 318 for (i = 0; i<nrows; i++) { 319 p_nnz = pattern.row_nnz[i]; 320 p_icol = pattern.icols[i]; 321 322 r_nnz = retval.row_nnz[i]; 323 r_icol = retval.icols[i]; 324 r_val = retval.values[i]; 325 A_nnz = ai[rip[i]+1] - ai[rip[i]]; 326 A_icol = &aj[ai[rip[i]]]; 327 A_val = &aa[ai[rip[i]]]; 328 329 /* Calculate val += A(i,i:n)'; */ 330 for (j=0; j<A_nnz; j++) { 331 k = riip[A_icol[j]]; 332 if (k>=i) val[k] = A_val[j]; 333 } 334 335 /* Add regularization */ 336 val[i] += epsdiag; 337 338 /* Calculate lvec = diag(D(0:i-1)) * L(0:i-1,i); 339 val(i) = A(i,i) - L(0:i-1,i)' * lvec */ 340 for (j=0; j<r_nnz; j++) { 341 k = r_icol[j]; 342 lvec[k] = diag[k] * r_val[j]; 343 val[i] -= r_val[j] * lvec[k]; 344 } 345 346 /* Calculate the new diagonal */ 347 diag[i] = val[i]; 348 if (PetscRealPart(diag[i])<droptol) { 349 PetscCall(PetscInfo(NULL,"Error in spbas_incomplete_cholesky:\n")); 350 PetscCall(PetscInfo(NULL,"Negative diagonal in row %" PetscInt_FMT "\n",i+1)); 351 352 /* Delete the whole matrix at once. */ 353 PetscCall(spbas_delete(retval)); 354 *success = PETSC_FALSE; 355 PetscFunctionReturn(0); 356 } 357 358 /* If necessary, allocate arrays */ 359 if (r_nnz==0) { 360 PetscBool success = spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used); 361 PetscCheck(success,PETSC_COMM_SELF,PETSC_ERR_MEM,"spbas_cholesky_row_alloc() failed"); 362 r_icol = retval.icols[i]; 363 r_val = retval.values[i]; 364 } 365 366 /* Now, fill in */ 367 r_icol[r_nnz] = i; 368 r_val [r_nnz] = 1.0; 369 r_nnz++; 370 retval.row_nnz[i]++; 371 372 retval.nnz += r_nnz; 373 374 /* Calculate 375 val(i+1:n) = (A(i,i+1:n)- L(0:i-1,i+1:n)' * lvec)/diag(i) */ 376 for (j=1; j<p_nnz; j++) { 377 k = i+p_icol[j]; 378 r1_icol = retval.icols[k]; 379 r1_val = retval.values[k]; 380 for (jL=0; jL<retval.row_nnz[k]; jL++) { 381 val[k] -= r1_val[jL] * lvec[r1_icol[jL]]; 382 } 383 val[k] /= diag[i]; 384 385 if (PetscAbsScalar(val[k]) > droptol || PetscAbsScalar(val[k])< -droptol) { 386 /* If necessary, allocate arrays */ 387 if (!retval.row_nnz[k]) { 388 PetscBool flag,success = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used); 389 if (!success) { 390 PetscCall(spbas_cholesky_garbage_collect(&retval, i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz)); 391 flag = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used); 392 PetscCheck(flag,PETSC_COMM_SELF,PETSC_ERR_MEM,"Allocation in spbas_cholesky_row_alloc() failed"); 393 r_icol = retval.icols[i]; 394 } 395 } 396 397 retval.icols[k][retval.row_nnz[k]] = i; 398 retval.values[k][retval.row_nnz[k]] = val[k]; 399 retval.row_nnz[k]++; 400 } 401 val[k] = 0; 402 } 403 404 /* Erase the values used in the work arrays */ 405 for (j=0; j<r_nnz; j++) lvec[r_icol[j]] = 0; 406 } 407 408 PetscCall(PetscFree(lvec)); 409 PetscCall(PetscFree(val)); 410 411 PetscCall(spbas_cholesky_garbage_collect(&retval, nrows, &n_row_alloc_ok, &n_alloc_used, max_row_nnz)); 412 PetscCall(PetscFree(max_row_nnz)); 413 414 /* Place the inverse of the diagonals in the matrix */ 415 for (i=0; i<nrows; i++) { 416 r_nnz = retval.row_nnz[i]; 417 418 retval.values[i][r_nnz-1] = 1.0 / diag[i]; 419 for (j=0; j<r_nnz-1; j++) { 420 retval.values[i][j] *= -1; 421 } 422 } 423 PetscCall(PetscFree(diag)); 424 *matrix_L = retval; 425 *success = PETSC_TRUE; 426 PetscFunctionReturn(0); 427 } 428