1 2 /* 3 spbas_cholesky_row_alloc: 4 in the data arrays, find a place where another row may be stored. 5 Return PETSC_ERR_MEM if there is insufficient space to store the 6 row, so garbage collection and/or re-allocation may be done. 7 */ 8 #undef __FUNCT__ 9 #define __FUNCT__ "spbas_cholesky_row_alloc" 10 PetscErrorCode spbas_cholesky_row_alloc(spbas_matrix retval, PetscInt k, PetscInt r_nnz,PetscInt * n_alloc_used) 11 { 12 PetscFunctionBegin; 13 retval.icols[k] = &retval.alloc_icol[*n_alloc_used]; 14 retval.values[k] = &retval.alloc_val[*n_alloc_used]; 15 *n_alloc_used += r_nnz; 16 if (*n_alloc_used > retval.n_alloc_icol) PetscFunctionReturn(PETSC_ERR_MEM); 17 PetscFunctionReturn(0); 18 } 19 20 21 /* 22 spbas_cholesky_garbage_collect: 23 move the rows which have been calculated so far, as well as 24 those currently under construction, to the front of the 25 array, while putting them in the proper order. 26 When it seems necessary, enlarge the current arrays. 27 28 NB: re-allocation is being simulated using 29 PetscMalloc, memcpy, PetscFree, because 30 PetscRealloc does not seem to exist. 31 32 */ 33 #undef __FUNCT__ 34 #define __FUNCT__ "spbas_cholesky_garbage_collect" 35 PetscErrorCode spbas_cholesky_garbage_collect( 36 spbas_matrix *result, /* I/O: the Cholesky factor matrix being constructed. 37 Only the storage, not the contents of this matrix 38 is changed in this function */ 39 PetscInt i_row, /* I : Number of rows for which the final contents are 40 known */ 41 PetscInt *n_row_alloc_ok, /* I/O: Number of rows which are already in their final 42 places in the arrays: they need not be moved any more */ 43 PetscInt *n_alloc_used, /* I/O: */ 44 PetscInt *max_row_nnz /*I : Over-estimate of the number of nonzeros needed to 45 store each row */ 46 ) 47 { 48 49 50 /* PSEUDO-CODE: 51 1. Choose the appropriate size for the arrays 52 2. Rescue the arrays which would be lost during garbage collection 53 3. Reallocate and correct administration 54 4. Move all arrays so that they are in proper order */ 55 56 PetscInt i,j; 57 PetscInt nrows = result->nrows; 58 PetscInt n_alloc_ok=0; 59 PetscInt n_alloc_ok_max=0; 60 PetscErrorCode ierr; 61 PetscInt need_already= 0; 62 PetscInt n_rows_ahead=0; 63 PetscInt max_need_extra= 0; 64 PetscInt n_alloc_max, n_alloc_est, n_alloc; 65 PetscInt n_alloc_now = result->n_alloc_icol; 66 PetscInt *alloc_icol_old = result->alloc_icol; 67 PetscScalar *alloc_val_old = result->alloc_val; 68 PetscInt *icol_rescue; 69 PetscScalar *val_rescue; 70 PetscInt n_rescue; 71 PetscInt n_row_rescue; 72 PetscInt i_here, i_last, n_copy; 73 const PetscReal xtra_perc = 20; 74 75 PetscFunctionBegin; 76 77 /********************************************************* 78 1. Choose appropriate array size 79 Count number of rows and memory usage which is already final */ 80 for (i=0;i<i_row; i++) { 81 n_alloc_ok += result->row_nnz[i]; 82 n_alloc_ok_max += max_row_nnz[i]; 83 } 84 85 /* Count currently needed memory usage and future memory requirements 86 (max, predicted)*/ 87 for (i=i_row; i<nrows; i++) { 88 if (!result->row_nnz[i]) { 89 max_need_extra += max_row_nnz[i]; 90 } else { 91 need_already += max_row_nnz[i]; 92 n_rows_ahead++; 93 } 94 } 95 96 /* Make maximal and realistic memory requirement estimates */ 97 n_alloc_max = n_alloc_ok + need_already + max_need_extra; 98 n_alloc_est = n_alloc_ok + need_already + (int) (((PetscReal) max_need_extra) * ((PetscReal) n_alloc_ok) /((PetscReal) n_alloc_ok_max)); 99 100 /* Choose array sizes */ 101 if (n_alloc_max == n_alloc_est) { n_alloc = n_alloc_max; } 102 else if (n_alloc_now >= n_alloc_est) { n_alloc = n_alloc_now; } 103 else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) { n_alloc = n_alloc_max; } 104 else { n_alloc = (int) (n_alloc_est * (1+xtra_perc/100.0)); } 105 106 /* If new estimate is less than what we already have, 107 don't reallocate, just garbage-collect */ 108 if (n_alloc_max != n_alloc_est && n_alloc < result->n_alloc_icol) { 109 n_alloc = result->n_alloc_icol; 110 } 111 112 /* Motivate dimension choice */ 113 ierr = PetscInfo1(PETSC_NULL," Allocating %d nonzeros: ",n_alloc);CHKERRQ(ierr); 114 if (n_alloc_max == n_alloc_est) { ierr = PetscInfo(PETSC_NULL,"this is the correct size\n");CHKERRQ(ierr); } 115 else if (n_alloc_now >= n_alloc_est) { ierr = PetscInfo(PETSC_NULL,"the current size, which seems enough\n");CHKERRQ(ierr); } 116 else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) { ierr = PetscInfo(PETSC_NULL,"the maximum estimate\n");CHKERRQ(ierr); } 117 else { ierr = PetscInfo1(PETSC_NULL,"%6.2f %% more than the estimate\n",xtra_perc);CHKERRQ(ierr); } 118 119 120 /********************************************************** 121 2. Rescue arrays which would be lost 122 Count how many rows and nonzeros will have to be rescued 123 when moving all arrays in place */ 124 n_row_rescue = 0; n_rescue = 0; 125 if (*n_row_alloc_ok==0) { *n_alloc_used = 0; } 126 else { 127 i = *n_row_alloc_ok - 1; 128 *n_alloc_used = (result->icols[i]-result->alloc_icol) + result->row_nnz[i]; 129 } 130 131 for (i=*n_row_alloc_ok; i<nrows; i++) { 132 i_here = result->icols[i]-result->alloc_icol; 133 i_last = i_here + result->row_nnz[i]; 134 if (result->row_nnz[i]>0) { 135 if (*n_alloc_used > i_here || i_last > n_alloc) { 136 n_rescue += result->row_nnz[i]; 137 n_row_rescue++; 138 } 139 140 if (i<i_row) { *n_alloc_used += result->row_nnz[i];} 141 else { *n_alloc_used += max_row_nnz[i];} 142 } 143 } 144 145 /* Allocate rescue arrays */ 146 ierr = PetscMalloc( n_rescue * sizeof(int), &icol_rescue);CHKERRQ(ierr); 147 ierr = PetscMalloc( n_rescue * sizeof(PetscScalar), &val_rescue);CHKERRQ(ierr); 148 149 /* Rescue the arrays which need rescuing */ 150 n_row_rescue = 0; n_rescue = 0; 151 if (*n_row_alloc_ok==0) { *n_alloc_used = 0; } 152 else { 153 i = *n_row_alloc_ok - 1; 154 *n_alloc_used = (result->icols[i]-result->alloc_icol) + result->row_nnz[i]; 155 } 156 157 for (i=*n_row_alloc_ok; i<nrows; i++) { 158 i_here = result->icols[i]-result->alloc_icol; 159 i_last = i_here + result->row_nnz[i]; 160 if (result->row_nnz[i]>0) { 161 if (*n_alloc_used > i_here || i_last > n_alloc) 162 { 163 ierr = PetscMemcpy((void*) &icol_rescue[n_rescue], (void*) result->icols[i], result->row_nnz[i] * sizeof(int));CHKERRQ(ierr); 164 ierr = PetscMemcpy((void*) &val_rescue[n_rescue], (void*) result->values[i], result->row_nnz[i] * sizeof(PetscScalar));CHKERRQ(ierr); 165 n_rescue += result->row_nnz[i]; 166 n_row_rescue++; 167 } 168 169 if (i<i_row) { *n_alloc_used += result->row_nnz[i];} 170 else { *n_alloc_used += max_row_nnz[i];} 171 } 172 } 173 174 /********************************************************** 175 3. Reallocate and correct administration */ 176 177 if (n_alloc != result->n_alloc_icol) { 178 n_copy = MIN(n_alloc,result->n_alloc_icol); 179 180 /* PETSC knows no REALLOC, so we'll REALLOC ourselves. 181 182 Allocate new icol-data, copy old contents */ 183 ierr = PetscMalloc( n_alloc * sizeof(int), &result->alloc_icol);CHKERRQ(ierr); 184 ierr = PetscMemcpy(result->alloc_icol, alloc_icol_old, n_copy*sizeof(int));CHKERRQ(ierr); 185 186 /* Update administration, Reset pointers to new arrays */ 187 result->n_alloc_icol = n_alloc; 188 for (i=0; i<nrows; i++) { 189 result->icols[i] = result->alloc_icol + (result->icols[i] - alloc_icol_old); 190 result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol); 191 } 192 193 /* Delete old array */ 194 ierr = PetscFree(alloc_icol_old);CHKERRQ(ierr); 195 196 /* Allocate new value-data, copy old contents */ 197 ierr = PetscMalloc( n_alloc * sizeof(PetscScalar), &result->alloc_val);CHKERRQ(ierr); 198 ierr = PetscMemcpy(result->alloc_val, alloc_val_old, n_copy*sizeof(PetscScalar));CHKERRQ(ierr); 199 200 /* Update administration, Reset pointers to new arrays */ 201 result->n_alloc_val = n_alloc; 202 for (i=0; i<nrows; i++) { 203 result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol); 204 } 205 206 /* Delete old array */ 207 ierr = PetscFree(alloc_val_old);CHKERRQ(ierr); 208 } 209 210 211 /********************************************************* 212 4. Copy all the arrays to their proper places */ 213 n_row_rescue = 0; n_rescue = 0; 214 if (*n_row_alloc_ok==0) { *n_alloc_used = 0; } 215 else { 216 i = *n_row_alloc_ok - 1; 217 *n_alloc_used = (result->icols[i]-result->alloc_icol) + result->row_nnz[i]; 218 } 219 220 for (i=*n_row_alloc_ok; i<nrows; i++) { 221 i_here = result->icols[i]-result->alloc_icol; 222 i_last = i_here + result->row_nnz[i]; 223 224 result->icols[i] = result->alloc_icol + *n_alloc_used; 225 result->values[i]= result->alloc_val + *n_alloc_used; 226 227 if (result->row_nnz[i]>0) { 228 if (*n_alloc_used > i_here || i_last > n_alloc) { 229 ierr = PetscMemcpy((void*) result->icols[i], (void*) &icol_rescue[n_rescue], result->row_nnz[i] * sizeof(int));CHKERRQ(ierr); 230 ierr = PetscMemcpy((void*) result->values[i], (void*) &val_rescue[n_rescue],result->row_nnz[i] * sizeof(PetscScalar));CHKERRQ(ierr); 231 n_rescue += result->row_nnz[i]; 232 n_row_rescue++; 233 } else { 234 for (j=0; j<result->row_nnz[i]; j++) { 235 result->icols[i][j] = result->alloc_icol[i_here+j]; 236 result->values[i][j] = result->alloc_val[ i_here+j]; 237 } 238 } 239 if (i<i_row) { *n_alloc_used += result->row_nnz[i];} 240 else { *n_alloc_used += max_row_nnz[i];} 241 } 242 } 243 244 /* Delete the rescue arrays */ 245 ierr = PetscFree(icol_rescue);CHKERRQ(ierr); 246 ierr = PetscFree(val_rescue);CHKERRQ(ierr); 247 *n_row_alloc_ok = i_row; 248 PetscFunctionReturn(0); 249 } 250 251 /* 252 spbas_incomplete_cholesky: 253 incomplete Cholesky decomposition of a square, symmetric, 254 positive definite matrix. 255 256 In case negative diagonals are encountered, function returns 257 NEGATIVE_DIAGONAL. When this happens, call this function again 258 with a larger epsdiag_in, a less sparse pattern, and/or a smaller 259 droptol 260 */ 261 #undef __FUNCT__ 262 #define __FUNCT__ "spbas_incomplete_cholesky" 263 PetscErrorCode spbas_incomplete_cholesky(Mat A, const PetscInt *rip, const PetscInt *riip, spbas_matrix pattern, PetscReal droptol, PetscReal epsdiag_in, spbas_matrix * matrix_L) 264 { 265 PetscInt jL; 266 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 267 PetscInt *ai=a->i,*aj=a->j; 268 MatScalar *aa=a->a; 269 PetscInt nrows, ncols; 270 PetscInt *max_row_nnz; 271 PetscErrorCode ierr; 272 spbas_matrix retval; 273 PetscScalar * diag; 274 PetscScalar * val; 275 PetscScalar * lvec; 276 PetscScalar epsdiag; 277 PetscInt i,j,k; 278 const PetscTruth do_values = PETSC_TRUE; 279 PetscInt * r1_icol; 280 PetscScalar *r1_val; 281 PetscInt * r_icol; 282 PetscInt r_nnz; 283 PetscScalar *r_val; 284 PetscInt * A_icol; 285 PetscInt A_nnz; 286 PetscScalar *A_val; 287 PetscInt * p_icol; 288 PetscInt p_nnz; 289 PetscInt n_row_alloc_ok = 0; /* number of rows which have been stored correctly in the matrix */ 290 PetscInt n_alloc_used = 0; /* part of result->icols and result->values which is currently being used */ 291 292 PetscFunctionBegin; 293 /* Convert the Manteuffel shift from 'fraction of average diagonal' to 294 dimensioned value */ 295 ierr = MatGetSize(A, &nrows, &ncols);CHKERRQ(ierr); 296 ierr = MatGetTrace(A, &epsdiag);CHKERRQ(ierr); 297 epsdiag *= epsdiag_in / nrows; 298 ierr = PetscInfo2(PETSC_NULL," Dimensioned Manteuffel shift %G Drop tolerance %G\n", PetscRealPart(epsdiag),droptol);CHKERRQ(ierr); 299 300 if (droptol<1e-10) {droptol=1e-10;} 301 302 if ( (nrows != pattern.nrows) || 303 (ncols != pattern.ncols) || 304 (ncols != nrows) ) 305 { 306 SETERRQ( PETSC_ERR_ARG_INCOMP, 307 "Dimension error in spbas_incomplete_cholesky\n"); 308 } 309 310 retval.nrows = nrows; 311 retval.ncols = nrows; 312 retval.nnz = pattern.nnz/10; 313 retval.col_idx_type = SPBAS_COLUMN_NUMBERS; 314 retval.block_data = PETSC_TRUE; 315 316 ierr = spbas_allocate_pattern(&retval, do_values);CHKERRQ(ierr); 317 memset((void*) retval.row_nnz, 0, nrows*sizeof(int)); 318 ierr = spbas_allocate_data(&retval);CHKERRQ(ierr); 319 retval.nnz = 0; 320 321 ierr = PetscMalloc(nrows*sizeof(PetscScalar), &diag);CHKERRQ(ierr); 322 ierr = PetscMalloc(nrows*sizeof(PetscScalar), &val);CHKERRQ(ierr); 323 ierr = PetscMalloc(nrows*sizeof(PetscScalar), &lvec);CHKERRQ(ierr); 324 ierr = PetscMalloc(nrows*sizeof(int), &max_row_nnz);CHKERRQ(ierr); 325 if (!(diag && val && lvec && max_row_nnz)) 326 { 327 SETERRQ( PETSC_ERR_MEM, 328 "Allocation error in spbas_incomplete_cholesky\n"); 329 } 330 331 memset((void*) val, 0, nrows*sizeof(PetscScalar)); 332 memset((void*) lvec, 0, nrows*sizeof(PetscScalar)); 333 memset((void*) max_row_nnz, 0, nrows*sizeof(int)); 334 335 /* Check correct format of sparseness pattern */ 336 if (pattern.col_idx_type != SPBAS_DIAGONAL_OFFSETS) 337 { 338 SETERRQ( PETSC_ERR_SUP_SYS, 339 "Error in spbas_incomplete_cholesky: must have diagonal offsets in pattern\n"); 340 } 341 342 /* Count the nonzeros on transpose of pattern */ 343 for (i = 0; i<nrows; i++) 344 { 345 p_nnz = pattern.row_nnz[i]; 346 p_icol = pattern.icols[i]; 347 for (j=0; j<p_nnz; j++) 348 { 349 max_row_nnz[i+p_icol[j]]++; 350 } 351 } 352 353 /* Calculate rows of L */ 354 for (i = 0; i<nrows; i++) 355 { 356 p_nnz = pattern.row_nnz[i]; 357 p_icol = pattern.icols[i]; 358 359 r_nnz = retval.row_nnz[i]; 360 r_icol = retval.icols[i]; 361 r_val = retval.values[i]; 362 A_nnz = ai[rip[i]+1] - ai[rip[i]]; 363 A_icol = &aj[ai[rip[i]]]; 364 A_val = &aa[ai[rip[i]]]; 365 366 /* Calculate val += A(i,i:n)'; */ 367 for (j=0; j<A_nnz; j++) 368 { 369 k = riip[A_icol[j]]; 370 if (k>=i) { val[k] = A_val[j]; } 371 } 372 373 /* Add regularization */ 374 val[i] += epsdiag; 375 376 /* Calculate lvec = diag(D(0:i-1)) * L(0:i-1,i); 377 val(i) = A(i,i) - L(0:i-1,i)' * lvec */ 378 for ( j=0; j<r_nnz; j++) 379 { 380 k = r_icol[j]; 381 lvec[k] = diag[k] * r_val[j]; 382 val[i] -= r_val[j] * lvec[k]; 383 } 384 385 /* Calculate the new diagonal */ 386 diag[i] = val[i]; 387 if (PetscRealPart(diag[i])<droptol) 388 { 389 ierr = PetscInfo(PETSC_NULL,"Error in spbas_incomplete_cholesky:\n"); 390 ierr = PetscInfo1(PETSC_NULL,"Negative diagonal in row %d\n",i+1); 391 392 /* Delete the whole matrix at once. */ 393 ierr = spbas_delete(retval); 394 return NEGATIVE_DIAGONAL; 395 } 396 397 /* If necessary, allocate arrays */ 398 if (r_nnz==0) 399 { 400 ierr = spbas_cholesky_row_alloc( retval, i, 1, &n_alloc_used);CHKERRQ(ierr); 401 if (ierr == PETSC_ERR_MEM) 402 { 403 ierr = spbas_cholesky_garbage_collect( &retval, i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr); 404 ierr = spbas_cholesky_row_alloc( retval, i, 1, &n_alloc_used); 405 } 406 r_icol = retval.icols[i]; 407 r_val = retval.values[i]; 408 } 409 410 /* Now, fill in */ 411 r_icol[r_nnz] = i; 412 r_val [r_nnz] = 1.0; 413 r_nnz++; 414 retval.row_nnz[i]++; 415 416 retval.nnz += r_nnz; 417 418 /* Calculate 419 val(i+1:n) = (A(i,i+1:n)- L(0:i-1,i+1:n)' * lvec)/diag(i) */ 420 for (j=1; j<p_nnz; j++) 421 { 422 k = i+p_icol[j]; 423 r1_icol = retval.icols[k]; 424 r1_val = retval.values[k]; 425 for (jL=0; jL<retval.row_nnz[k]; jL++) 426 { 427 val[k] -= r1_val[jL] * lvec[r1_icol[jL]]; 428 } 429 val[k] /= diag[i]; 430 431 if (PetscAbsScalar(val[k]) > droptol || PetscAbsScalar(val[k])< -droptol) 432 { 433 /* If necessary, allocate arrays */ 434 if (retval.row_nnz[k]==0) 435 { 436 ierr = spbas_cholesky_row_alloc( retval, k, max_row_nnz[k], &n_alloc_used); 437 if (ierr == PETSC_ERR_MEM) { 438 ierr = spbas_cholesky_garbage_collect( &retval, i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr); 439 ierr = spbas_cholesky_row_alloc( retval, k, max_row_nnz[k], &n_alloc_used);CHKERRQ(ierr); 440 r_icol = retval.icols[i]; 441 r_val = retval.values[i]; 442 } 443 } 444 445 retval.icols[k][retval.row_nnz[k]] = i; 446 retval.values[k][retval.row_nnz[k]] = val[k]; 447 retval.row_nnz[k]++; 448 } 449 val[k] = 0; 450 } 451 452 /*Erase the values used in the work arrays */ 453 for (j=0; j<r_nnz; j++) { lvec[r_icol[j]] = 0; } 454 } 455 456 ierr=PetscFree(lvec);CHKERRQ(ierr); 457 ierr=PetscFree(val);CHKERRQ(ierr); 458 459 ierr = spbas_cholesky_garbage_collect( &retval, nrows, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr); 460 ierr=PetscFree(max_row_nnz);CHKERRQ(ierr); 461 462 /* Place the inverse of the diagonals in the matrix */ 463 for (i=0; i<nrows; i++) { 464 r_nnz = retval.row_nnz[i]; 465 retval.values[i][r_nnz-1] = 1.0 / diag[i]; 466 for (j=0; j<r_nnz-1; j++) { 467 retval.values[i][j] *= -1; 468 } 469 } 470 ierr=PetscFree(diag);CHKERRQ(ierr); 471 *matrix_L = retval; 472 PetscFunctionReturn(0); 473 } 474 475