1 2 /* 3 spbas_cholesky_row_alloc: 4 in the data arrays, find a place where another row may be stored. 5 Return PETSC_ERR_MEM if there is insufficient space to store the 6 row, so garbage collection and/or re-allocation may be done. 7 */ 8 #undef __FUNCT__ 9 #define __FUNCT__ "spbas_cholesky_row_alloc" 10 PetscErrorCode spbas_cholesky_row_alloc(spbas_matrix retval, PetscInt k, PetscInt r_nnz,PetscInt * n_alloc_used) 11 { 12 PetscFunctionBegin; 13 retval.icols[k] = &retval.alloc_icol[*n_alloc_used]; 14 retval.values[k] = &retval.alloc_val[*n_alloc_used]; 15 *n_alloc_used += r_nnz; 16 if (*n_alloc_used > retval.n_alloc_icol) PetscFunctionReturn(PETSC_ERR_MEM); 17 PetscFunctionReturn(0); 18 } 19 20 21 /* 22 spbas_cholesky_garbage_collect: 23 move the rows which have been calculated so far, as well as 24 those currently under construction, to the front of the 25 array, while putting them in the proper order. 26 When it seems necessary, enlarge the current arrays. 27 28 NB: re-allocation is being simulated using 29 PetscMalloc, memcpy, PetscFree, because 30 PetscRealloc does not seem to exist. 31 32 */ 33 #undef __FUNCT__ 34 #define __FUNCT__ "spbas_cholesky_garbage_collect" 35 PetscErrorCode spbas_cholesky_garbage_collect( 36 spbas_matrix *result, /* I/O: the Cholesky factor matrix being constructed. 37 Only the storage, not the contents of this matrix 38 is changed in this function */ 39 PetscInt i_row, /* I : Number of rows for which the final contents are 40 known */ 41 PetscInt *n_row_alloc_ok, /* I/O: Number of rows which are already in their final 42 places in the arrays: they need not be moved any more */ 43 PetscInt *n_alloc_used, /* I/O: */ 44 PetscInt *max_row_nnz) /* I : Over-estimate of the number of nonzeros needed to 45 store each row */ 46 { 47 48 49 /* PSEUDO-CODE: 50 1. Choose the appropriate size for the arrays 51 2. Rescue the arrays which would be lost during garbage collection 52 3. Reallocate and correct administration 53 4. Move all arrays so that they are in proper order */ 54 55 PetscInt i,j; 56 PetscInt nrows = result->nrows; 57 PetscInt n_alloc_ok=0; 58 PetscInt n_alloc_ok_max=0; 59 PetscErrorCode ierr; 60 PetscInt need_already= 0; 61 PetscInt n_rows_ahead=0; 62 PetscInt max_need_extra= 0; 63 PetscInt n_alloc_max, n_alloc_est, n_alloc; 64 PetscInt n_alloc_now = result->n_alloc_icol; 65 PetscInt *alloc_icol_old = result->alloc_icol; 66 PetscScalar *alloc_val_old = result->alloc_val; 67 PetscInt *icol_rescue; 68 PetscScalar *val_rescue; 69 PetscInt n_rescue; 70 PetscInt n_row_rescue; 71 PetscInt i_here, i_last, n_copy; 72 const PetscReal xtra_perc = 20; 73 74 PetscFunctionBegin; 75 /********************************************************* 76 1. Choose appropriate array size 77 Count number of rows and memory usage which is already final */ 78 for (i=0;i<i_row; i++) { 79 n_alloc_ok += result->row_nnz[i]; 80 n_alloc_ok_max += max_row_nnz[i]; 81 } 82 83 /* Count currently needed memory usage and future memory requirements 84 (max, predicted)*/ 85 for (i=i_row; i<nrows; i++) { 86 if (!result->row_nnz[i]) { 87 max_need_extra += max_row_nnz[i]; 88 } else { 89 need_already += max_row_nnz[i]; 90 n_rows_ahead++; 91 } 92 } 93 94 /* Make maximal and realistic memory requirement estimates */ 95 n_alloc_max = n_alloc_ok + need_already + max_need_extra; 96 n_alloc_est = n_alloc_ok + need_already + (int) (((PetscReal) max_need_extra) * ((PetscReal) n_alloc_ok) /((PetscReal) n_alloc_ok_max)); 97 98 /* Choose array sizes */ 99 if (n_alloc_max == n_alloc_est) { n_alloc = n_alloc_max; } 100 else if (n_alloc_now >= n_alloc_est) { n_alloc = n_alloc_now; } 101 else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) { n_alloc = n_alloc_max; } 102 else { n_alloc = (int) (n_alloc_est * (1+xtra_perc/100.0)); } 103 104 /* If new estimate is less than what we already have, 105 don't reallocate, just garbage-collect */ 106 if (n_alloc_max != n_alloc_est && n_alloc < result->n_alloc_icol) { 107 n_alloc = result->n_alloc_icol; 108 } 109 110 /* Motivate dimension choice */ 111 ierr = PetscInfo1(PETSC_NULL," Allocating %d nonzeros: ",n_alloc);CHKERRQ(ierr); 112 if (n_alloc_max == n_alloc_est) { ierr = PetscInfo(PETSC_NULL,"this is the correct size\n");CHKERRQ(ierr); } 113 else if (n_alloc_now >= n_alloc_est) { ierr = PetscInfo(PETSC_NULL,"the current size, which seems enough\n");CHKERRQ(ierr); } 114 else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) { ierr = PetscInfo(PETSC_NULL,"the maximum estimate\n");CHKERRQ(ierr); } 115 else { ierr = PetscInfo1(PETSC_NULL,"%6.2f %% more than the estimate\n",xtra_perc);CHKERRQ(ierr); } 116 117 118 /********************************************************** 119 2. Rescue arrays which would be lost 120 Count how many rows and nonzeros will have to be rescued 121 when moving all arrays in place */ 122 n_row_rescue = 0; n_rescue = 0; 123 if (*n_row_alloc_ok==0) { *n_alloc_used = 0; } 124 else { 125 i = *n_row_alloc_ok - 1; 126 *n_alloc_used = (result->icols[i]-result->alloc_icol) + result->row_nnz[i]; 127 } 128 129 for (i=*n_row_alloc_ok; i<nrows; i++) { 130 i_here = result->icols[i]-result->alloc_icol; 131 i_last = i_here + result->row_nnz[i]; 132 if (result->row_nnz[i]>0) { 133 if (*n_alloc_used > i_here || i_last > n_alloc) { 134 n_rescue += result->row_nnz[i]; 135 n_row_rescue++; 136 } 137 138 if (i<i_row) { *n_alloc_used += result->row_nnz[i];} 139 else { *n_alloc_used += max_row_nnz[i];} 140 } 141 } 142 143 /* Allocate rescue arrays */ 144 ierr = PetscMalloc(n_rescue * sizeof(int), &icol_rescue);CHKERRQ(ierr); 145 ierr = PetscMalloc(n_rescue * sizeof(PetscScalar), &val_rescue);CHKERRQ(ierr); 146 147 /* Rescue the arrays which need rescuing */ 148 n_row_rescue = 0; n_rescue = 0; 149 if (*n_row_alloc_ok==0) { *n_alloc_used = 0; } 150 else { 151 i = *n_row_alloc_ok - 1; 152 *n_alloc_used = (result->icols[i]-result->alloc_icol) + result->row_nnz[i]; 153 } 154 155 for (i=*n_row_alloc_ok; i<nrows; i++) { 156 i_here = result->icols[i]-result->alloc_icol; 157 i_last = i_here + result->row_nnz[i]; 158 if (result->row_nnz[i]>0) { 159 if (*n_alloc_used > i_here || i_last > n_alloc) { 160 ierr = PetscMemcpy((void*) &icol_rescue[n_rescue], (void*) result->icols[i], result->row_nnz[i] * sizeof(int));CHKERRQ(ierr); 161 ierr = PetscMemcpy((void*) &val_rescue[n_rescue], (void*) result->values[i], result->row_nnz[i] * sizeof(PetscScalar));CHKERRQ(ierr); 162 n_rescue += result->row_nnz[i]; 163 n_row_rescue++; 164 } 165 166 if (i<i_row) { *n_alloc_used += result->row_nnz[i];} 167 else { *n_alloc_used += max_row_nnz[i];} 168 } 169 } 170 171 /********************************************************** 172 3. Reallocate and correct administration */ 173 174 if (n_alloc != result->n_alloc_icol) { 175 n_copy = PetscMin(n_alloc,result->n_alloc_icol); 176 177 /* PETSC knows no REALLOC, so we'll REALLOC ourselves. 178 179 Allocate new icol-data, copy old contents */ 180 ierr = PetscMalloc(n_alloc * sizeof(int), &result->alloc_icol);CHKERRQ(ierr); 181 ierr = PetscMemcpy(result->alloc_icol, alloc_icol_old, n_copy*sizeof(int));CHKERRQ(ierr); 182 183 /* Update administration, Reset pointers to new arrays */ 184 result->n_alloc_icol = n_alloc; 185 for (i=0; i<nrows; i++) { 186 result->icols[i] = result->alloc_icol + (result->icols[i] - alloc_icol_old); 187 result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol); 188 } 189 190 /* Delete old array */ 191 ierr = PetscFree(alloc_icol_old);CHKERRQ(ierr); 192 193 /* Allocate new value-data, copy old contents */ 194 ierr = PetscMalloc(n_alloc * sizeof(PetscScalar), &result->alloc_val);CHKERRQ(ierr); 195 ierr = PetscMemcpy(result->alloc_val, alloc_val_old, n_copy*sizeof(PetscScalar));CHKERRQ(ierr); 196 197 /* Update administration, Reset pointers to new arrays */ 198 result->n_alloc_val = n_alloc; 199 for (i=0; i<nrows; i++) { 200 result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol); 201 } 202 203 /* Delete old array */ 204 ierr = PetscFree(alloc_val_old);CHKERRQ(ierr); 205 } 206 207 208 /********************************************************* 209 4. Copy all the arrays to their proper places */ 210 n_row_rescue = 0; n_rescue = 0; 211 if (*n_row_alloc_ok==0) { *n_alloc_used = 0; } 212 else { 213 i = *n_row_alloc_ok - 1; 214 *n_alloc_used = (result->icols[i]-result->alloc_icol) + result->row_nnz[i]; 215 } 216 217 for (i=*n_row_alloc_ok; i<nrows; i++) { 218 i_here = result->icols[i]-result->alloc_icol; 219 i_last = i_here + result->row_nnz[i]; 220 221 result->icols[i] = result->alloc_icol + *n_alloc_used; 222 result->values[i]= result->alloc_val + *n_alloc_used; 223 224 if (result->row_nnz[i]>0) { 225 if (*n_alloc_used > i_here || i_last > n_alloc) { 226 ierr = PetscMemcpy((void*) result->icols[i], (void*) &icol_rescue[n_rescue], result->row_nnz[i] * sizeof(int));CHKERRQ(ierr); 227 ierr = PetscMemcpy((void*) result->values[i], (void*) &val_rescue[n_rescue],result->row_nnz[i] * sizeof(PetscScalar));CHKERRQ(ierr); 228 n_rescue += result->row_nnz[i]; 229 n_row_rescue++; 230 } else { 231 for (j=0; j<result->row_nnz[i]; j++) { 232 result->icols[i][j] = result->alloc_icol[i_here+j]; 233 result->values[i][j] = result->alloc_val[ i_here+j]; 234 } 235 } 236 if (i<i_row) { *n_alloc_used += result->row_nnz[i];} 237 else { *n_alloc_used += max_row_nnz[i];} 238 } 239 } 240 241 /* Delete the rescue arrays */ 242 ierr = PetscFree(icol_rescue);CHKERRQ(ierr); 243 ierr = PetscFree(val_rescue);CHKERRQ(ierr); 244 *n_row_alloc_ok = i_row; 245 PetscFunctionReturn(0); 246 } 247 248 /* 249 spbas_incomplete_cholesky: 250 incomplete Cholesky decomposition of a square, symmetric, 251 positive definite matrix. 252 253 In case negative diagonals are encountered, function returns 254 NEGATIVE_DIAGONAL. When this happens, call this function again 255 with a larger epsdiag_in, a less sparse pattern, and/or a smaller 256 droptol 257 */ 258 #undef __FUNCT__ 259 #define __FUNCT__ "spbas_incomplete_cholesky" 260 PetscErrorCode spbas_incomplete_cholesky(Mat A, const PetscInt *rip, const PetscInt *riip, spbas_matrix pattern, PetscReal droptol, PetscReal epsdiag_in, spbas_matrix * matrix_L) 261 { 262 PetscInt jL; 263 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 264 PetscInt *ai=a->i,*aj=a->j; 265 MatScalar *aa=a->a; 266 PetscInt nrows, ncols; 267 PetscInt *max_row_nnz; 268 PetscErrorCode ierr; 269 spbas_matrix retval; 270 PetscScalar * diag; 271 PetscScalar * val; 272 PetscScalar * lvec; 273 PetscScalar epsdiag; 274 PetscInt i,j,k; 275 const PetscBool do_values = PETSC_TRUE; 276 PetscInt * r1_icol; 277 PetscScalar *r1_val; 278 PetscInt * r_icol; 279 PetscInt r_nnz; 280 PetscScalar *r_val; 281 PetscInt * A_icol; 282 PetscInt A_nnz; 283 PetscScalar *A_val; 284 PetscInt * p_icol; 285 PetscInt p_nnz; 286 PetscInt n_row_alloc_ok = 0; /* number of rows which have been stored correctly in the matrix */ 287 PetscInt n_alloc_used = 0; /* part of result->icols and result->values which is currently being used */ 288 289 PetscFunctionBegin; 290 /* Convert the Manteuffel shift from 'fraction of average diagonal' to dimensioned value */ 291 ierr = MatGetSize(A, &nrows, &ncols);CHKERRQ(ierr); 292 ierr = MatGetTrace(A, &epsdiag);CHKERRQ(ierr); 293 epsdiag *= epsdiag_in / nrows; 294 ierr = PetscInfo2(PETSC_NULL," Dimensioned Manteuffel shift %G Drop tolerance %G\n", PetscRealPart(epsdiag),droptol);CHKERRQ(ierr); 295 296 if (droptol<1e-10) {droptol=1e-10;} 297 298 if ((nrows != pattern.nrows) || (ncols != pattern.ncols) || (ncols != nrows)) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP,"Dimension error in spbas_incomplete_cholesky\n"); 299 300 retval.nrows = nrows; 301 retval.ncols = nrows; 302 retval.nnz = pattern.nnz/10; 303 retval.col_idx_type = SPBAS_COLUMN_NUMBERS; 304 retval.block_data = PETSC_TRUE; 305 306 ierr = spbas_allocate_pattern(&retval, do_values);CHKERRQ(ierr); 307 ierr = PetscMemzero((void*) retval.row_nnz, nrows*sizeof(int));CHKERRQ(ierr); 308 ierr = spbas_allocate_data(&retval);CHKERRQ(ierr); 309 retval.nnz = 0; 310 311 ierr = PetscMalloc(nrows*sizeof(PetscScalar), &diag);CHKERRQ(ierr); 312 ierr = PetscMalloc(nrows*sizeof(PetscScalar), &val);CHKERRQ(ierr); 313 ierr = PetscMalloc(nrows*sizeof(PetscScalar), &lvec);CHKERRQ(ierr); 314 ierr = PetscMalloc(nrows*sizeof(int), &max_row_nnz);CHKERRQ(ierr); 315 if (!(diag && val && lvec && max_row_nnz)) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MEM, "Allocation error in spbas_incomplete_cholesky\n"); 316 317 ierr = PetscMemzero((void*) val, nrows*sizeof(PetscScalar));CHKERRQ(ierr); 318 ierr = PetscMemzero((void*) lvec, nrows*sizeof(PetscScalar));CHKERRQ(ierr); 319 ierr = PetscMemzero((void*) max_row_nnz, nrows*sizeof(int));CHKERRQ(ierr); 320 321 /* Check correct format of sparseness pattern */ 322 if (pattern.col_idx_type != SPBAS_DIAGONAL_OFFSETS) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Error in spbas_incomplete_cholesky: must have diagonal offsets in pattern\n"); 323 324 /* Count the nonzeros on transpose of pattern */ 325 for (i = 0; i<nrows; i++) { 326 p_nnz = pattern.row_nnz[i]; 327 p_icol = pattern.icols[i]; 328 for (j=0; j<p_nnz; j++) { 329 max_row_nnz[i+p_icol[j]]++; 330 } 331 } 332 333 /* Calculate rows of L */ 334 for (i = 0; i<nrows; i++) { 335 p_nnz = pattern.row_nnz[i]; 336 p_icol = pattern.icols[i]; 337 338 r_nnz = retval.row_nnz[i]; 339 r_icol = retval.icols[i]; 340 r_val = retval.values[i]; 341 A_nnz = ai[rip[i]+1] - ai[rip[i]]; 342 A_icol = &aj[ai[rip[i]]]; 343 A_val = &aa[ai[rip[i]]]; 344 345 /* Calculate val += A(i,i:n)'; */ 346 for (j=0; j<A_nnz; j++) { 347 k = riip[A_icol[j]]; 348 if (k>=i) { val[k] = A_val[j]; } 349 } 350 351 /* Add regularization */ 352 val[i] += epsdiag; 353 354 /* Calculate lvec = diag(D(0:i-1)) * L(0:i-1,i); 355 val(i) = A(i,i) - L(0:i-1,i)' * lvec */ 356 for (j=0; j<r_nnz; j++) { 357 k = r_icol[j]; 358 lvec[k] = diag[k] * r_val[j]; 359 val[i] -= r_val[j] * lvec[k]; 360 } 361 362 /* Calculate the new diagonal */ 363 diag[i] = val[i]; 364 if (PetscRealPart(diag[i])<droptol) { 365 ierr = PetscInfo(PETSC_NULL,"Error in spbas_incomplete_cholesky:\n"); 366 ierr = PetscInfo1(PETSC_NULL,"Negative diagonal in row %d\n",i+1); 367 368 /* Delete the whole matrix at once. */ 369 ierr = spbas_delete(retval); 370 return NEGATIVE_DIAGONAL; 371 } 372 373 /* If necessary, allocate arrays */ 374 if (r_nnz==0) { 375 ierr = spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used);CHKERRQ(ierr); 376 if (ierr == PETSC_ERR_MEM) { 377 ierr = spbas_cholesky_garbage_collect(&retval, i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr); 378 ierr = spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used); 379 } 380 r_icol = retval.icols[i]; 381 r_val = retval.values[i]; 382 } 383 384 /* Now, fill in */ 385 r_icol[r_nnz] = i; 386 r_val [r_nnz] = 1.0; 387 r_nnz++; 388 retval.row_nnz[i]++; 389 390 retval.nnz += r_nnz; 391 392 /* Calculate 393 val(i+1:n) = (A(i,i+1:n)- L(0:i-1,i+1:n)' * lvec)/diag(i) */ 394 for (j=1; j<p_nnz; j++) { 395 k = i+p_icol[j]; 396 r1_icol = retval.icols[k]; 397 r1_val = retval.values[k]; 398 for (jL=0; jL<retval.row_nnz[k]; jL++) { 399 val[k] -= r1_val[jL] * lvec[r1_icol[jL]]; 400 } 401 val[k] /= diag[i]; 402 403 if (PetscAbsScalar(val[k]) > droptol || PetscAbsScalar(val[k])< -droptol) { 404 /* If necessary, allocate arrays */ 405 if (retval.row_nnz[k]==0) { 406 ierr = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used); 407 if (ierr == PETSC_ERR_MEM) { 408 ierr = spbas_cholesky_garbage_collect(&retval, i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr); 409 ierr = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);CHKERRQ(ierr); 410 r_icol = retval.icols[i]; 411 r_val = retval.values[i]; 412 } 413 } 414 415 retval.icols[k][retval.row_nnz[k]] = i; 416 retval.values[k][retval.row_nnz[k]] = val[k]; 417 retval.row_nnz[k]++; 418 } 419 val[k] = 0; 420 } 421 422 /* Erase the values used in the work arrays */ 423 for (j=0; j<r_nnz; j++) { lvec[r_icol[j]] = 0; } 424 } 425 426 ierr=PetscFree(lvec);CHKERRQ(ierr); 427 ierr=PetscFree(val);CHKERRQ(ierr); 428 429 ierr = spbas_cholesky_garbage_collect(&retval, nrows, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr); 430 ierr=PetscFree(max_row_nnz);CHKERRQ(ierr); 431 432 /* Place the inverse of the diagonals in the matrix */ 433 for (i=0; i<nrows; i++) { 434 r_nnz = retval.row_nnz[i]; 435 retval.values[i][r_nnz-1] = 1.0 / diag[i]; 436 for (j=0; j<r_nnz-1; j++) { 437 retval.values[i][j] *= -1; 438 } 439 } 440 ierr=PetscFree(diag);CHKERRQ(ierr); 441 *matrix_L = retval; 442 PetscFunctionReturn(0); 443 } 444 445