1 2 /* 3 spbas_cholesky_row_alloc: 4 in the data arrays, find a place where another row may be stored. 5 Return PETSC_ERR_MEM if there is insufficient space to store the 6 row, so garbage collection and/or re-allocation may be done. 7 */ 8 #undef __FUNCT__ 9 #define __FUNCT__ "spbas_cholesky_row_alloc" 10 PetscErrorCode spbas_cholesky_row_alloc(spbas_matrix retval, PetscInt k, PetscInt r_nnz,PetscInt * n_alloc_used) 11 { 12 PetscFunctionBegin; 13 retval.icols[k] = &retval.alloc_icol[*n_alloc_used]; 14 retval.values[k] = &retval.alloc_val[*n_alloc_used]; 15 *n_alloc_used += r_nnz; 16 if (*n_alloc_used > retval.n_alloc_icol) PetscFunctionReturn(PETSC_ERR_MEM); 17 PetscFunctionReturn(0); 18 } 19 20 21 /* 22 spbas_cholesky_garbage_collect: 23 move the rows which have been calculated so far, as well as 24 those currently under construction, to the front of the 25 array, while putting them in the proper order. 26 When it seems necessary, enlarge the current arrays. 27 28 NB: re-allocation is being simulated using 29 PetscMalloc, memcpy, PetscFree, because 30 PetscRealloc does not seem to exist. 31 32 */ 33 #undef __FUNCT__ 34 #define __FUNCT__ "spbas_cholesky_garbage_collect" 35 PetscErrorCode spbas_cholesky_garbage_collect( 36 spbas_matrix *result, /* I/O: the Cholesky factor matrix being constructed. 37 Only the storage, not the contents of this matrix 38 is changed in this function */ 39 PetscInt i_row, /* I : Number of rows for which the final contents are 40 known */ 41 PetscInt *n_row_alloc_ok, /* I/O: Number of rows which are already in their final 42 places in the arrays: they need not be moved any more */ 43 PetscInt *n_alloc_used, /* I/O: */ 44 PetscInt *max_row_nnz) /* I : Over-estimate of the number of nonzeros needed to 45 store each row */ 46 { 47 48 49 /* PSEUDO-CODE: 50 1. Choose the appropriate size for the arrays 51 2. Rescue the arrays which would be lost during garbage collection 52 3. Reallocate and correct administration 53 4. Move all arrays so that they are in proper order */ 54 55 PetscInt i,j; 56 PetscInt nrows = result->nrows; 57 PetscInt n_alloc_ok=0; 58 PetscInt n_alloc_ok_max=0; 59 PetscErrorCode ierr; 60 PetscInt need_already= 0; 61 PetscInt n_rows_ahead=0; 62 PetscInt max_need_extra= 0; 63 PetscInt n_alloc_max, n_alloc_est, n_alloc; 64 PetscInt n_alloc_now = result->n_alloc_icol; 65 PetscInt *alloc_icol_old = result->alloc_icol; 66 PetscScalar *alloc_val_old = result->alloc_val; 67 PetscInt *icol_rescue; 68 PetscScalar *val_rescue; 69 PetscInt n_rescue; 70 PetscInt n_row_rescue; 71 PetscInt i_here, i_last, n_copy; 72 const PetscReal xtra_perc = 20; 73 74 PetscFunctionBegin; 75 76 /********************************************************* 77 1. Choose appropriate array size 78 Count number of rows and memory usage which is already final */ 79 for (i=0;i<i_row; i++) { 80 n_alloc_ok += result->row_nnz[i]; 81 n_alloc_ok_max += max_row_nnz[i]; 82 } 83 84 /* Count currently needed memory usage and future memory requirements 85 (max, predicted)*/ 86 for (i=i_row; i<nrows; i++) { 87 if (!result->row_nnz[i]) { 88 max_need_extra += max_row_nnz[i]; 89 } else { 90 need_already += max_row_nnz[i]; 91 n_rows_ahead++; 92 } 93 } 94 95 /* Make maximal and realistic memory requirement estimates */ 96 n_alloc_max = n_alloc_ok + need_already + max_need_extra; 97 n_alloc_est = n_alloc_ok + need_already + (int) (((PetscReal) max_need_extra) * ((PetscReal) n_alloc_ok) /((PetscReal) n_alloc_ok_max)); 98 99 /* Choose array sizes */ 100 if (n_alloc_max == n_alloc_est) { n_alloc = n_alloc_max; } 101 else if (n_alloc_now >= n_alloc_est) { n_alloc = n_alloc_now; } 102 else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) { n_alloc = n_alloc_max; } 103 else { n_alloc = (int) (n_alloc_est * (1+xtra_perc/100.0)); } 104 105 /* If new estimate is less than what we already have, 106 don't reallocate, just garbage-collect */ 107 if (n_alloc_max != n_alloc_est && n_alloc < result->n_alloc_icol) { 108 n_alloc = result->n_alloc_icol; 109 } 110 111 /* Motivate dimension choice */ 112 ierr = PetscInfo1(PETSC_NULL," Allocating %d nonzeros: ",n_alloc);CHKERRQ(ierr); 113 if (n_alloc_max == n_alloc_est) { ierr = PetscInfo(PETSC_NULL,"this is the correct size\n");CHKERRQ(ierr); } 114 else if (n_alloc_now >= n_alloc_est) { ierr = PetscInfo(PETSC_NULL,"the current size, which seems enough\n");CHKERRQ(ierr); } 115 else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) { ierr = PetscInfo(PETSC_NULL,"the maximum estimate\n");CHKERRQ(ierr); } 116 else { ierr = PetscInfo1(PETSC_NULL,"%6.2f %% more than the estimate\n",xtra_perc);CHKERRQ(ierr); } 117 118 119 /********************************************************** 120 2. Rescue arrays which would be lost 121 Count how many rows and nonzeros will have to be rescued 122 when moving all arrays in place */ 123 n_row_rescue = 0; n_rescue = 0; 124 if (*n_row_alloc_ok==0) { *n_alloc_used = 0; } 125 else { 126 i = *n_row_alloc_ok - 1; 127 *n_alloc_used = (result->icols[i]-result->alloc_icol) + result->row_nnz[i]; 128 } 129 130 for (i=*n_row_alloc_ok; i<nrows; i++) { 131 i_here = result->icols[i]-result->alloc_icol; 132 i_last = i_here + result->row_nnz[i]; 133 if (result->row_nnz[i]>0) { 134 if (*n_alloc_used > i_here || i_last > n_alloc) { 135 n_rescue += result->row_nnz[i]; 136 n_row_rescue++; 137 } 138 139 if (i<i_row) { *n_alloc_used += result->row_nnz[i];} 140 else { *n_alloc_used += max_row_nnz[i];} 141 } 142 } 143 144 /* Allocate rescue arrays */ 145 ierr = PetscMalloc(n_rescue * sizeof(int), &icol_rescue);CHKERRQ(ierr); 146 ierr = PetscMalloc(n_rescue * sizeof(PetscScalar), &val_rescue);CHKERRQ(ierr); 147 148 /* Rescue the arrays which need rescuing */ 149 n_row_rescue = 0; n_rescue = 0; 150 if (*n_row_alloc_ok==0) { *n_alloc_used = 0; } 151 else { 152 i = *n_row_alloc_ok - 1; 153 *n_alloc_used = (result->icols[i]-result->alloc_icol) + result->row_nnz[i]; 154 } 155 156 for (i=*n_row_alloc_ok; i<nrows; i++) { 157 i_here = result->icols[i]-result->alloc_icol; 158 i_last = i_here + result->row_nnz[i]; 159 if (result->row_nnz[i]>0) { 160 if (*n_alloc_used > i_here || i_last > n_alloc) { 161 ierr = PetscMemcpy((void*) &icol_rescue[n_rescue], (void*) result->icols[i], result->row_nnz[i] * sizeof(int));CHKERRQ(ierr); 162 ierr = PetscMemcpy((void*) &val_rescue[n_rescue], (void*) result->values[i], result->row_nnz[i] * sizeof(PetscScalar));CHKERRQ(ierr); 163 n_rescue += result->row_nnz[i]; 164 n_row_rescue++; 165 } 166 167 if (i<i_row) { *n_alloc_used += result->row_nnz[i];} 168 else { *n_alloc_used += max_row_nnz[i];} 169 } 170 } 171 172 /********************************************************** 173 3. Reallocate and correct administration */ 174 175 if (n_alloc != result->n_alloc_icol) { 176 n_copy = PetscMin(n_alloc,result->n_alloc_icol); 177 178 /* PETSC knows no REALLOC, so we'll REALLOC ourselves. 179 180 Allocate new icol-data, copy old contents */ 181 ierr = PetscMalloc(n_alloc * sizeof(int), &result->alloc_icol);CHKERRQ(ierr); 182 ierr = PetscMemcpy(result->alloc_icol, alloc_icol_old, n_copy*sizeof(int));CHKERRQ(ierr); 183 184 /* Update administration, Reset pointers to new arrays */ 185 result->n_alloc_icol = n_alloc; 186 for (i=0; i<nrows; i++) { 187 result->icols[i] = result->alloc_icol + (result->icols[i] - alloc_icol_old); 188 result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol); 189 } 190 191 /* Delete old array */ 192 ierr = PetscFree(alloc_icol_old);CHKERRQ(ierr); 193 194 /* Allocate new value-data, copy old contents */ 195 ierr = PetscMalloc(n_alloc * sizeof(PetscScalar), &result->alloc_val);CHKERRQ(ierr); 196 ierr = PetscMemcpy(result->alloc_val, alloc_val_old, n_copy*sizeof(PetscScalar));CHKERRQ(ierr); 197 198 /* Update administration, Reset pointers to new arrays */ 199 result->n_alloc_val = n_alloc; 200 for (i=0; i<nrows; i++) { 201 result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol); 202 } 203 204 /* Delete old array */ 205 ierr = PetscFree(alloc_val_old);CHKERRQ(ierr); 206 } 207 208 209 /********************************************************* 210 4. Copy all the arrays to their proper places */ 211 n_row_rescue = 0; n_rescue = 0; 212 if (*n_row_alloc_ok==0) { *n_alloc_used = 0; } 213 else { 214 i = *n_row_alloc_ok - 1; 215 *n_alloc_used = (result->icols[i]-result->alloc_icol) + result->row_nnz[i]; 216 } 217 218 for (i=*n_row_alloc_ok; i<nrows; i++) { 219 i_here = result->icols[i]-result->alloc_icol; 220 i_last = i_here + result->row_nnz[i]; 221 222 result->icols[i] = result->alloc_icol + *n_alloc_used; 223 result->values[i]= result->alloc_val + *n_alloc_used; 224 225 if (result->row_nnz[i]>0) { 226 if (*n_alloc_used > i_here || i_last > n_alloc) { 227 ierr = PetscMemcpy((void*) result->icols[i], (void*) &icol_rescue[n_rescue], result->row_nnz[i] * sizeof(int));CHKERRQ(ierr); 228 ierr = PetscMemcpy((void*) result->values[i], (void*) &val_rescue[n_rescue],result->row_nnz[i] * sizeof(PetscScalar));CHKERRQ(ierr); 229 n_rescue += result->row_nnz[i]; 230 n_row_rescue++; 231 } else { 232 for (j=0; j<result->row_nnz[i]; j++) { 233 result->icols[i][j] = result->alloc_icol[i_here+j]; 234 result->values[i][j] = result->alloc_val[ i_here+j]; 235 } 236 } 237 if (i<i_row) { *n_alloc_used += result->row_nnz[i];} 238 else { *n_alloc_used += max_row_nnz[i];} 239 } 240 } 241 242 /* Delete the rescue arrays */ 243 ierr = PetscFree(icol_rescue);CHKERRQ(ierr); 244 ierr = PetscFree(val_rescue);CHKERRQ(ierr); 245 *n_row_alloc_ok = i_row; 246 PetscFunctionReturn(0); 247 } 248 249 /* 250 spbas_incomplete_cholesky: 251 incomplete Cholesky decomposition of a square, symmetric, 252 positive definite matrix. 253 254 In case negative diagonals are encountered, function returns 255 NEGATIVE_DIAGONAL. When this happens, call this function again 256 with a larger epsdiag_in, a less sparse pattern, and/or a smaller 257 droptol 258 */ 259 #undef __FUNCT__ 260 #define __FUNCT__ "spbas_incomplete_cholesky" 261 PetscErrorCode spbas_incomplete_cholesky(Mat A, const PetscInt *rip, const PetscInt *riip, spbas_matrix pattern, PetscReal droptol, PetscReal epsdiag_in, spbas_matrix * matrix_L) 262 { 263 PetscInt jL; 264 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 265 PetscInt *ai=a->i,*aj=a->j; 266 MatScalar *aa=a->a; 267 PetscInt nrows, ncols; 268 PetscInt *max_row_nnz; 269 PetscErrorCode ierr; 270 spbas_matrix retval; 271 PetscScalar * diag; 272 PetscScalar * val; 273 PetscScalar * lvec; 274 PetscScalar epsdiag; 275 PetscInt i,j,k; 276 const PetscBool do_values = PETSC_TRUE; 277 PetscInt * r1_icol; 278 PetscScalar *r1_val; 279 PetscInt * r_icol; 280 PetscInt r_nnz; 281 PetscScalar *r_val; 282 PetscInt * A_icol; 283 PetscInt A_nnz; 284 PetscScalar *A_val; 285 PetscInt * p_icol; 286 PetscInt p_nnz; 287 PetscInt n_row_alloc_ok = 0; /* number of rows which have been stored correctly in the matrix */ 288 PetscInt n_alloc_used = 0; /* part of result->icols and result->values which is currently being used */ 289 290 PetscFunctionBegin; 291 /* Convert the Manteuffel shift from 'fraction of average diagonal' to dimensioned value */ 292 ierr = MatGetSize(A, &nrows, &ncols);CHKERRQ(ierr); 293 ierr = MatGetTrace(A, &epsdiag);CHKERRQ(ierr); 294 epsdiag *= epsdiag_in / nrows; 295 ierr = PetscInfo2(PETSC_NULL," Dimensioned Manteuffel shift %G Drop tolerance %G\n", PetscRealPart(epsdiag),droptol);CHKERRQ(ierr); 296 297 if (droptol<1e-10) {droptol=1e-10;} 298 299 if ((nrows != pattern.nrows) || (ncols != pattern.ncols) || (ncols != nrows)) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP,"Dimension error in spbas_incomplete_cholesky\n"); 300 301 retval.nrows = nrows; 302 retval.ncols = nrows; 303 retval.nnz = pattern.nnz/10; 304 retval.col_idx_type = SPBAS_COLUMN_NUMBERS; 305 retval.block_data = PETSC_TRUE; 306 307 ierr = spbas_allocate_pattern(&retval, do_values);CHKERRQ(ierr); 308 ierr = PetscMemzero((void*) retval.row_nnz, nrows*sizeof(int));CHKERRQ(ierr); 309 ierr = spbas_allocate_data(&retval);CHKERRQ(ierr); 310 retval.nnz = 0; 311 312 ierr = PetscMalloc(nrows*sizeof(PetscScalar), &diag);CHKERRQ(ierr); 313 ierr = PetscMalloc(nrows*sizeof(PetscScalar), &val);CHKERRQ(ierr); 314 ierr = PetscMalloc(nrows*sizeof(PetscScalar), &lvec);CHKERRQ(ierr); 315 ierr = PetscMalloc(nrows*sizeof(int), &max_row_nnz);CHKERRQ(ierr); 316 if (!(diag && val && lvec && max_row_nnz)) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MEM, "Allocation error in spbas_incomplete_cholesky\n"); 317 318 ierr = PetscMemzero((void*) val, nrows*sizeof(PetscScalar));CHKERRQ(ierr); 319 ierr = PetscMemzero((void*) lvec, nrows*sizeof(PetscScalar));CHKERRQ(ierr); 320 ierr = PetscMemzero((void*) max_row_nnz, nrows*sizeof(int));CHKERRQ(ierr); 321 322 /* Check correct format of sparseness pattern */ 323 if (pattern.col_idx_type != SPBAS_DIAGONAL_OFFSETS) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Error in spbas_incomplete_cholesky: must have diagonal offsets in pattern\n"); 324 325 /* Count the nonzeros on transpose of pattern */ 326 for (i = 0; i<nrows; i++) { 327 p_nnz = pattern.row_nnz[i]; 328 p_icol = pattern.icols[i]; 329 for (j=0; j<p_nnz; j++) { 330 max_row_nnz[i+p_icol[j]]++; 331 } 332 } 333 334 /* Calculate rows of L */ 335 for (i = 0; i<nrows; i++) { 336 p_nnz = pattern.row_nnz[i]; 337 p_icol = pattern.icols[i]; 338 339 r_nnz = retval.row_nnz[i]; 340 r_icol = retval.icols[i]; 341 r_val = retval.values[i]; 342 A_nnz = ai[rip[i]+1] - ai[rip[i]]; 343 A_icol = &aj[ai[rip[i]]]; 344 A_val = &aa[ai[rip[i]]]; 345 346 /* Calculate val += A(i,i:n)'; */ 347 for (j=0; j<A_nnz; j++) { 348 k = riip[A_icol[j]]; 349 if (k>=i) { val[k] = A_val[j]; } 350 } 351 352 /* Add regularization */ 353 val[i] += epsdiag; 354 355 /* Calculate lvec = diag(D(0:i-1)) * L(0:i-1,i); 356 val(i) = A(i,i) - L(0:i-1,i)' * lvec */ 357 for (j=0; j<r_nnz; j++) { 358 k = r_icol[j]; 359 lvec[k] = diag[k] * r_val[j]; 360 val[i] -= r_val[j] * lvec[k]; 361 } 362 363 /* Calculate the new diagonal */ 364 diag[i] = val[i]; 365 if (PetscRealPart(diag[i])<droptol) { 366 ierr = PetscInfo(PETSC_NULL,"Error in spbas_incomplete_cholesky:\n"); 367 ierr = PetscInfo1(PETSC_NULL,"Negative diagonal in row %d\n",i+1); 368 369 /* Delete the whole matrix at once. */ 370 ierr = spbas_delete(retval); 371 return NEGATIVE_DIAGONAL; 372 } 373 374 /* If necessary, allocate arrays */ 375 if (r_nnz==0) { 376 ierr = spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used);CHKERRQ(ierr); 377 if (ierr == PETSC_ERR_MEM) { 378 ierr = spbas_cholesky_garbage_collect(&retval, i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr); 379 ierr = spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used); 380 } 381 r_icol = retval.icols[i]; 382 r_val = retval.values[i]; 383 } 384 385 /* Now, fill in */ 386 r_icol[r_nnz] = i; 387 r_val [r_nnz] = 1.0; 388 r_nnz++; 389 retval.row_nnz[i]++; 390 391 retval.nnz += r_nnz; 392 393 /* Calculate 394 val(i+1:n) = (A(i,i+1:n)- L(0:i-1,i+1:n)' * lvec)/diag(i) */ 395 for (j=1; j<p_nnz; j++) { 396 k = i+p_icol[j]; 397 r1_icol = retval.icols[k]; 398 r1_val = retval.values[k]; 399 for (jL=0; jL<retval.row_nnz[k]; jL++) { 400 val[k] -= r1_val[jL] * lvec[r1_icol[jL]]; 401 } 402 val[k] /= diag[i]; 403 404 if (PetscAbsScalar(val[k]) > droptol || PetscAbsScalar(val[k])< -droptol) { 405 /* If necessary, allocate arrays */ 406 if (retval.row_nnz[k]==0) { 407 ierr = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used); 408 if (ierr == PETSC_ERR_MEM) { 409 ierr = spbas_cholesky_garbage_collect(&retval, i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr); 410 ierr = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);CHKERRQ(ierr); 411 r_icol = retval.icols[i]; 412 r_val = retval.values[i]; 413 } 414 } 415 416 retval.icols[k][retval.row_nnz[k]] = i; 417 retval.values[k][retval.row_nnz[k]] = val[k]; 418 retval.row_nnz[k]++; 419 } 420 val[k] = 0; 421 } 422 423 /* Erase the values used in the work arrays */ 424 for (j=0; j<r_nnz; j++) { lvec[r_icol[j]] = 0; } 425 } 426 427 ierr=PetscFree(lvec);CHKERRQ(ierr); 428 ierr=PetscFree(val);CHKERRQ(ierr); 429 430 ierr = spbas_cholesky_garbage_collect(&retval, nrows, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr); 431 ierr=PetscFree(max_row_nnz);CHKERRQ(ierr); 432 433 /* Place the inverse of the diagonals in the matrix */ 434 for (i=0; i<nrows; i++) { 435 r_nnz = retval.row_nnz[i]; 436 retval.values[i][r_nnz-1] = 1.0 / diag[i]; 437 for (j=0; j<r_nnz-1; j++) { 438 retval.values[i][j] *= -1; 439 } 440 } 441 ierr=PetscFree(diag);CHKERRQ(ierr); 442 *matrix_L = retval; 443 PetscFunctionReturn(0); 444 } 445 446