1 2 /* 3 spbas_cholesky_row_alloc: 4 in the data arrays, find a place where another row may be stored. 5 Return PETSC_ERR_MEM if there is insufficient space to store the 6 row, so garbage collection and/or re-allocation may be done. 7 */ 8 #undef __FUNCT__ 9 #define __FUNCT__ "spbas_cholesky_row_alloc" 10 PetscErrorCode spbas_cholesky_row_alloc(spbas_matrix retval, PetscInt k, PetscInt r_nnz,PetscInt * n_alloc_used) 11 { 12 PetscFunctionBegin; 13 retval.icols[k] = &retval.alloc_icol[*n_alloc_used]; 14 retval.values[k] = &retval.alloc_val[*n_alloc_used]; 15 *n_alloc_used += r_nnz; 16 if (*n_alloc_used > retval.n_alloc_icol) 17 { 18 PetscFunctionReturn(PETSC_ERR_MEM); 19 } 20 else 21 { 22 PetscFunctionReturn(0); 23 } 24 } 25 26 27 /* 28 spbas_cholesky_garbage_collect: 29 move the rows which have been calculated so far, as well as 30 those currently under construction, to the front of the 31 array, while putting them in the proper order. 32 When it seems necessary, enlarge the current arrays. 33 34 NB: re-allocation is being simulated using 35 PetscMalloc, memcpy, PetscFree, because 36 PetscRealloc does not seem to exist. 37 38 */ 39 #undef __FUNCT__ 40 #define __FUNCT__ "spbas_cholesky_garbage_collect" 41 PetscErrorCode spbas_cholesky_garbage_collect( 42 spbas_matrix * result, /* I/O: the Cholesky factor matrix being constructed. 43 Only the storage, not the contents of this matrix 44 is changed in this function */ 45 PetscInt i_row, /* I : Number of rows for which the final contents are 46 known */ 47 PetscInt *n_row_alloc_ok, /* I/O: Number of rows which are already in their final 48 places in the arrays: they need not be moved any more */ 49 PetscInt *n_alloc_used, /* I/O: */ 50 PetscInt *max_row_nnz /*I : Over-estimate of the number of nonzeros needed to 51 store each row */ 52 ) 53 { 54 55 56 /* PSEUDO-CODE: 57 1. Choose the appropriate size for the arrays 58 2. Rescue the arrays which would be lost during garbage collection 59 3. Reallocate and correct administration 60 4. Move all arrays so that they are in proper order */ 61 62 PetscInt i,j; 63 PetscInt nrows = result->nrows; 64 PetscInt n_alloc_ok=0; 65 PetscInt n_alloc_ok_max=0; 66 67 PetscErrorCode ierr; 68 PetscInt need_already= 0; 69 PetscInt n_rows_ahead=0; 70 PetscInt max_need_extra= 0; 71 PetscInt n_alloc_max, n_alloc_est, n_alloc; 72 PetscInt n_alloc_now = result->n_alloc_icol; 73 PetscInt *alloc_icol_old = result->alloc_icol; 74 PetscScalar *alloc_val_old = result->alloc_val; 75 PetscInt *icol_rescue; 76 PetscScalar *val_rescue; 77 PetscInt n_rescue; 78 PetscInt n_row_rescue; 79 PetscInt i_here, i_last, n_copy; 80 const PetscReal xtra_perc = 20; 81 82 PetscFunctionBegin; 83 84 /********************************************************* 85 1. Choose appropriate array size 86 Count number of rows and memory usage which is already final */ 87 for (i=0;i<i_row; i++) 88 { 89 n_alloc_ok += result->row_nnz[i]; 90 n_alloc_ok_max += max_row_nnz[i]; 91 } 92 93 /* Count currently needed memory usage and future memory requirements 94 (max, predicted)*/ 95 for (i=i_row; i<nrows; i++) 96 { 97 if (result->row_nnz[i]==0) 98 { 99 max_need_extra += max_row_nnz[i]; 100 } 101 else 102 { 103 need_already += max_row_nnz[i]; 104 n_rows_ahead++; 105 } 106 } 107 108 /* Make maximal and realistic memory requirement estimates */ 109 n_alloc_max = n_alloc_ok + need_already + max_need_extra; 110 n_alloc_est = n_alloc_ok + need_already + (int) (((PetscReal) max_need_extra) * 111 ((PetscReal) n_alloc_ok) /((PetscReal) n_alloc_ok_max)); 112 113 /* Choose array sizes */ 114 if (n_alloc_max == n_alloc_est) 115 { n_alloc = n_alloc_max; } 116 else if (n_alloc_now >= n_alloc_est) 117 { n_alloc = n_alloc_now; } 118 else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) 119 { n_alloc = n_alloc_max; } 120 else 121 { n_alloc = (int) (n_alloc_est * (1+xtra_perc/100.0)); } 122 123 /* If new estimate is less than what we already have, 124 don't reallocate, just garbage-collect */ 125 if (n_alloc_max != n_alloc_est && n_alloc < result->n_alloc_icol) 126 { 127 n_alloc = result->n_alloc_icol; 128 } 129 130 /* Motivate dimension choice */ 131 ierr = PetscInfo1(PETSC_NULL," Allocating %d nonzeros: ",n_alloc);CHKERRQ(ierr); 132 if (n_alloc_max == n_alloc_est) 133 { ierr = PetscInfo(PETSC_NULL,"this is the correct size\n");CHKERRQ(ierr); } 134 else if (n_alloc_now >= n_alloc_est) 135 { ierr = PetscInfo(PETSC_NULL,"the current size, which seems enough\n");CHKERRQ(ierr); } 136 else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) 137 { ierr = PetscInfo(PETSC_NULL,"the maximum estimate\n");CHKERRQ(ierr); } 138 else 139 { ierr = PetscInfo1(PETSC_NULL,"%6.2f %% more than the estimate\n",xtra_perc);CHKERRQ(ierr); } 140 141 142 /********************************************************** 143 2. Rescue arrays which would be lost 144 Count how many rows and nonzeros will have to be rescued 145 when moving all arrays in place */ 146 n_row_rescue = 0; n_rescue = 0; 147 if (*n_row_alloc_ok==0) { *n_alloc_used = 0; } 148 else 149 { 150 i = *n_row_alloc_ok - 1; 151 *n_alloc_used = (result->icols[i]-result->alloc_icol) + 152 result->row_nnz[i]; 153 } 154 155 for (i=*n_row_alloc_ok; i<nrows; i++) 156 { 157 i_here = result->icols[i]-result->alloc_icol; 158 i_last = i_here + result->row_nnz[i]; 159 if (result->row_nnz[i]>0) 160 { 161 if (*n_alloc_used > i_here || i_last > n_alloc) 162 { 163 n_rescue += result->row_nnz[i]; 164 n_row_rescue++; 165 } 166 167 if (i<i_row) { *n_alloc_used += result->row_nnz[i];} 168 else { *n_alloc_used += max_row_nnz[i];} 169 } 170 } 171 172 /* Allocate rescue arrays */ 173 ierr = PetscMalloc( n_rescue * sizeof(int), &icol_rescue); 174 CHKERRQ(ierr); 175 ierr = PetscMalloc( n_rescue * sizeof(PetscScalar), &val_rescue); 176 CHKERRQ(ierr); 177 178 /* Rescue the arrays which need rescuing */ 179 n_row_rescue = 0; n_rescue = 0; 180 if (*n_row_alloc_ok==0) { *n_alloc_used = 0; } 181 else 182 { 183 i = *n_row_alloc_ok - 1; 184 *n_alloc_used = (result->icols[i]-result->alloc_icol) + 185 result->row_nnz[i]; 186 } 187 188 for (i=*n_row_alloc_ok; i<nrows; i++) 189 { 190 i_here = result->icols[i]-result->alloc_icol; 191 i_last = i_here + result->row_nnz[i]; 192 if (result->row_nnz[i]>0) 193 { 194 if (*n_alloc_used > i_here || i_last > n_alloc) 195 { 196 memcpy((void*) &icol_rescue[n_rescue], 197 (void*) result->icols[i], 198 result->row_nnz[i] * sizeof(int)); 199 memcpy((void*) &val_rescue[n_rescue], 200 (void*) result->values[i], 201 result->row_nnz[i] * sizeof(PetscScalar)); 202 n_rescue += result->row_nnz[i]; 203 n_row_rescue++; 204 } 205 206 if (i<i_row) { *n_alloc_used += result->row_nnz[i];} 207 else { *n_alloc_used += max_row_nnz[i];} 208 } 209 } 210 211 /********************************************************** 212 3. Reallocate and correct administration */ 213 214 if (n_alloc != result->n_alloc_icol) 215 { 216 n_copy = MIN(n_alloc,result->n_alloc_icol); 217 218 /* PETSC knows no REALLOC, so we'll REALLOC ourselves. 219 220 Allocate new icol-data, copy old contents */ 221 ierr = PetscMalloc( n_alloc * sizeof(int), 222 &result->alloc_icol); CHKERRQ(ierr); 223 memcpy(result->alloc_icol, alloc_icol_old, n_copy*sizeof(int)); 224 225 /* Update administration, Reset pointers to new arrays */ 226 result->n_alloc_icol = n_alloc; 227 for (i=0; i<nrows; i++) 228 { 229 result->icols[i] = result->alloc_icol + 230 (result->icols[i] - alloc_icol_old); 231 result->values[i] = result->alloc_val + 232 (result->icols[i] - result->alloc_icol); 233 } 234 235 /* Delete old array */ 236 ierr = PetscFree(alloc_icol_old); CHKERRQ(ierr); 237 238 /* Allocate new value-data, copy old contents */ 239 ierr = PetscMalloc( n_alloc * sizeof(PetscScalar), 240 &result->alloc_val); CHKERRQ(ierr); 241 memcpy(result->alloc_val, alloc_val_old, n_copy*sizeof(PetscScalar)); 242 243 /* Update administration, Reset pointers to new arrays */ 244 result->n_alloc_val = n_alloc; 245 for (i=0; i<nrows; i++) 246 { 247 result->values[i] = result->alloc_val + 248 (result->icols[i] - result->alloc_icol); 249 } 250 251 /* Delete old array */ 252 ierr = PetscFree(alloc_val_old); CHKERRQ(ierr); 253 } 254 255 256 /********************************************************* 257 4. Copy all the arrays to their proper places */ 258 n_row_rescue = 0; n_rescue = 0; 259 if (*n_row_alloc_ok==0) { *n_alloc_used = 0; } 260 else 261 { 262 i = *n_row_alloc_ok - 1; 263 *n_alloc_used = (result->icols[i]-result->alloc_icol) + 264 result->row_nnz[i]; 265 } 266 267 for (i=*n_row_alloc_ok; i<nrows; i++) 268 { 269 i_here = result->icols[i]-result->alloc_icol; 270 i_last = i_here + result->row_nnz[i]; 271 272 result->icols[i] = result->alloc_icol + *n_alloc_used; 273 result->values[i]= result->alloc_val + *n_alloc_used; 274 275 if (result->row_nnz[i]>0) 276 { 277 if (*n_alloc_used > i_here || i_last > n_alloc) 278 { 279 memcpy((void*) result->icols[i], 280 (void*) &icol_rescue[n_rescue], 281 result->row_nnz[i] * sizeof(int)); 282 memcpy((void*) result->values[i], 283 (void*) &val_rescue[n_rescue], 284 result->row_nnz[i] * sizeof(PetscScalar)); 285 n_rescue += result->row_nnz[i]; 286 n_row_rescue++; 287 } 288 else 289 { 290 for (j=0; j<result->row_nnz[i]; j++) 291 { 292 result->icols[i][j] = result->alloc_icol[i_here+j]; 293 result->values[i][j] = result->alloc_val[ i_here+j]; 294 } 295 } 296 if (i<i_row) { *n_alloc_used += result->row_nnz[i];} 297 else { *n_alloc_used += max_row_nnz[i];} 298 } 299 } 300 301 /* Delete the rescue arrays */ 302 ierr = PetscFree(icol_rescue); CHKERRQ(ierr); 303 ierr = PetscFree(val_rescue); CHKERRQ(ierr); 304 *n_row_alloc_ok = i_row; 305 306 PetscFunctionReturn(0); 307 308 } 309 310 /* 311 spbas_incomplete_cholesky: 312 incomplete Cholesky decomposition of a square, symmetric, 313 positive definite matrix. 314 315 In case negative diagonals are encountered, function returns 316 NEGATIVE_DIAGONAL. When this happens, call this function again 317 with a larger epsdiag_in, a less sparse pattern, and/or a smaller 318 droptol 319 */ 320 #undef __FUNCT__ 321 #define __FUNCT__ "spbas_incomplete_cholesky" 322 PetscErrorCode spbas_incomplete_cholesky( 323 Mat A, const PetscInt *rip, const PetscInt *riip, 324 spbas_matrix pattern, 325 PetscReal droptol, PetscReal epsdiag_in, 326 spbas_matrix * matrix_L) 327 { 328 PetscInt jL; 329 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 330 PetscInt *ai=a->i,*aj=a->j; 331 MatScalar *aa=a->a; 332 PetscInt nrows, ncols; 333 PetscInt *max_row_nnz; 334 PetscErrorCode ierr; 335 spbas_matrix retval; 336 PetscScalar * diag; 337 PetscScalar * val; 338 PetscScalar * lvec; 339 PetscScalar epsdiag; 340 PetscInt i,j,k; 341 const PetscTruth do_values = PETSC_TRUE; 342 PetscInt * r1_icol; PetscScalar *r1_val; 343 PetscInt * r_icol; PetscInt r_nnz; PetscScalar *r_val; 344 PetscInt * A_icol; PetscInt A_nnz; PetscScalar *A_val; 345 PetscInt * p_icol; PetscInt p_nnz; 346 PetscInt n_row_alloc_ok = 0; /* number of rows which have been stored 347 correctly in the matrix */ 348 PetscInt n_alloc_used = 0; /* part of result->icols and result->values 349 which is currently being used */ 350 PetscFunctionBegin; 351 352 /* Convert the Manteuffel shift from 'fraction of average diagonal' to 353 dimensioned value */ 354 ierr = MatGetSize(A, &nrows, &ncols); CHKERRQ(ierr); 355 ierr = MatGetTrace(A, &epsdiag); CHKERRQ(ierr); 356 epsdiag *= epsdiag_in / nrows; 357 ierr = PetscInfo2(PETSC_NULL," Dimensioned Manteuffel shift %G Drop tolerance %G\n", PetscRealPart(epsdiag),droptol);CHKERRQ(ierr); 358 359 if (droptol<1e-10) {droptol=1e-10;} 360 361 if ( (nrows != pattern.nrows) || 362 (ncols != pattern.ncols) || 363 (ncols != nrows) ) 364 { 365 SETERRQ( PETSC_ERR_ARG_INCOMP, 366 "Dimension error in spbas_incomplete_cholesky\n"); 367 } 368 369 retval.nrows = nrows; 370 retval.ncols = nrows; 371 retval.nnz = pattern.nnz/10; 372 retval.col_idx_type = SPBAS_COLUMN_NUMBERS; 373 retval.block_data = PETSC_TRUE; 374 375 ierr = spbas_allocate_pattern(&retval, do_values); CHKERRQ(ierr); 376 memset((void*) retval.row_nnz, 0, nrows*sizeof(int)); 377 ierr = spbas_allocate_data(&retval); CHKERRQ(ierr); 378 retval.nnz = 0; 379 380 ierr = PetscMalloc(nrows*sizeof(PetscScalar), &diag); CHKERRQ(ierr); 381 ierr = PetscMalloc(nrows*sizeof(PetscScalar), &val); CHKERRQ(ierr); 382 ierr = PetscMalloc(nrows*sizeof(PetscScalar), &lvec); CHKERRQ(ierr); 383 ierr = PetscMalloc(nrows*sizeof(int), &max_row_nnz); CHKERRQ(ierr); 384 if (!(diag && val && lvec && max_row_nnz)) 385 { 386 SETERRQ( PETSC_ERR_MEM, 387 "Allocation error in spbas_incomplete_cholesky\n"); 388 } 389 390 memset((void*) val, 0, nrows*sizeof(PetscScalar)); 391 memset((void*) lvec, 0, nrows*sizeof(PetscScalar)); 392 memset((void*) max_row_nnz, 0, nrows*sizeof(int)); 393 394 /* Check correct format of sparseness pattern */ 395 if (pattern.col_idx_type != SPBAS_DIAGONAL_OFFSETS) 396 { 397 SETERRQ( PETSC_ERR_SUP_SYS, 398 "Error in spbas_incomplete_cholesky: must have diagonal offsets in pattern\n"); 399 } 400 401 /* Count the nonzeros on transpose of pattern */ 402 for (i = 0; i<nrows; i++) 403 { 404 p_nnz = pattern.row_nnz[i]; 405 p_icol = pattern.icols[i]; 406 for (j=0; j<p_nnz; j++) 407 { 408 max_row_nnz[i+p_icol[j]]++; 409 } 410 } 411 412 /* Calculate rows of L */ 413 for (i = 0; i<nrows; i++) 414 { 415 p_nnz = pattern.row_nnz[i]; 416 p_icol = pattern.icols[i]; 417 418 r_nnz = retval.row_nnz[i]; 419 r_icol = retval.icols[i]; 420 r_val = retval.values[i]; 421 A_nnz = ai[rip[i]+1] - ai[rip[i]]; 422 A_icol = &aj[ai[rip[i]]]; 423 A_val = &aa[ai[rip[i]]]; 424 425 /* Calculate val += A(i,i:n)'; */ 426 for (j=0; j<A_nnz; j++) 427 { 428 k = riip[A_icol[j]]; 429 if (k>=i) { val[k] = A_val[j]; } 430 } 431 432 /* Add regularization */ 433 val[i] += epsdiag; 434 435 /* Calculate lvec = diag(D(0:i-1)) * L(0:i-1,i); 436 val(i) = A(i,i) - L(0:i-1,i)' * lvec */ 437 for ( j=0; j<r_nnz; j++) 438 { 439 k = r_icol[j]; 440 lvec[k] = diag[k] * r_val[j]; 441 val[i] -= r_val[j] * lvec[k]; 442 } 443 444 /* Calculate the new diagonal */ 445 diag[i] = val[i]; 446 if (PetscRealPart(diag[i])<droptol) 447 { 448 ierr = PetscInfo(PETSC_NULL,"Error in spbas_incomplete_cholesky:\n"); 449 ierr = PetscInfo1(PETSC_NULL,"Negative diagonal in row %d\n",i+1); 450 451 /* Delete the whole matrix at once. */ 452 ierr = spbas_delete(retval); 453 return NEGATIVE_DIAGONAL; 454 } 455 456 /* If necessary, allocate arrays */ 457 if (r_nnz==0) 458 { 459 ierr = spbas_cholesky_row_alloc( retval, i, 1, &n_alloc_used); 460 if (ierr == PETSC_ERR_MEM) 461 { 462 ierr = spbas_cholesky_garbage_collect( &retval, i, &n_row_alloc_ok, 463 &n_alloc_used, max_row_nnz); 464 CHKERRQ(ierr); 465 ierr = spbas_cholesky_row_alloc( retval, i, 1, &n_alloc_used); 466 } 467 CHKERRQ(ierr); 468 r_icol = retval.icols[i]; 469 r_val = retval.values[i]; 470 } 471 472 /* Now, fill in */ 473 r_icol[r_nnz] = i; 474 r_val [r_nnz] = 1.0; 475 r_nnz++; 476 retval.row_nnz[i]++; 477 478 retval.nnz += r_nnz; 479 480 /* Calculate 481 val(i+1:n) = (A(i,i+1:n)- L(0:i-1,i+1:n)' * lvec)/diag(i) */ 482 for (j=1; j<p_nnz; j++) 483 { 484 k = i+p_icol[j]; 485 r1_icol = retval.icols[k]; 486 r1_val = retval.values[k]; 487 for (jL=0; jL<retval.row_nnz[k]; jL++) 488 { 489 val[k] -= r1_val[jL] * lvec[r1_icol[jL]]; 490 } 491 val[k] /= diag[i]; 492 493 if (PetscAbsScalar(val[k]) > droptol || PetscAbsScalar(val[k])< -droptol) 494 { 495 /* If necessary, allocate arrays */ 496 if (retval.row_nnz[k]==0) 497 { 498 ierr = spbas_cholesky_row_alloc( retval, k, max_row_nnz[k], 499 &n_alloc_used); 500 if (ierr == PETSC_ERR_MEM) 501 { 502 ierr = spbas_cholesky_garbage_collect( 503 &retval, i, &n_row_alloc_ok, 504 &n_alloc_used, max_row_nnz); 505 CHKERRQ(ierr); 506 ierr = spbas_cholesky_row_alloc( retval, k, max_row_nnz[k], 507 &n_alloc_used); 508 r_icol = retval.icols[i]; 509 r_val = retval.values[i]; 510 } 511 CHKERRQ(ierr); 512 } 513 514 retval.icols[k][retval.row_nnz[k]] = i; 515 retval.values[k][retval.row_nnz[k]] = val[k]; 516 retval.row_nnz[k]++; 517 } 518 val[k] = 0; 519 } 520 521 /*Erase the values used in the work arrays */ 522 for (j=0; j<r_nnz; j++) { lvec[r_icol[j]] = 0; } 523 } 524 525 ierr=PetscFree(lvec); CHKERRQ(ierr); ierr=PetscFree(val); CHKERRQ(ierr); 526 527 ierr = spbas_cholesky_garbage_collect( &retval, nrows, &n_row_alloc_ok, 528 &n_alloc_used, max_row_nnz); 529 ierr=PetscFree(max_row_nnz); CHKERRQ(ierr); 530 531 /* Place the inverse of the diagonals in the matrix */ 532 for (i=0; i<nrows; i++) 533 { 534 r_nnz = retval.row_nnz[i]; 535 retval.values[i][r_nnz-1] = 1.0 / diag[i]; 536 for (j=0; j<r_nnz-1; j++) 537 { 538 retval.values[i][j] *= -1; 539 } 540 } 541 ierr=PetscFree(diag); CHKERRQ(ierr); 542 543 544 *matrix_L = retval; 545 546 PetscFunctionReturn(0); 547 } 548 549