1 2 /* 3 spbas_cholesky_row_alloc: 4 in the data arrays, find a place where another row may be stored. 5 Return PETSC_ERR_MEM if there is insufficient space to store the 6 row, so garbage collection and/or re-allocation may be done. 7 */ 8 #undef __FUNCT__ 9 #define __FUNCT__ "spbas_cholesky_row_alloc" 10 PetscErrorCode spbas_cholesky_row_alloc(spbas_matrix retval, PetscInt k, PetscInt r_nnz,PetscInt * n_alloc_used) 11 { 12 PetscFunctionBegin; 13 retval.icols[k] = &retval.alloc_icol[*n_alloc_used]; 14 retval.values[k] = &retval.alloc_val[*n_alloc_used]; 15 *n_alloc_used += r_nnz; 16 if (*n_alloc_used > retval.n_alloc_icol) PetscFunctionReturn(PETSC_ERR_MEM); 17 PetscFunctionReturn(0); 18 } 19 20 21 /* 22 spbas_cholesky_garbage_collect: 23 move the rows which have been calculated so far, as well as 24 those currently under construction, to the front of the 25 array, while putting them in the proper order. 26 When it seems necessary, enlarge the current arrays. 27 28 NB: re-allocation is being simulated using 29 PetscMalloc, memcpy, PetscFree, because 30 PetscRealloc does not seem to exist. 31 32 */ 33 #undef __FUNCT__ 34 #define __FUNCT__ "spbas_cholesky_garbage_collect" 35 PetscErrorCode spbas_cholesky_garbage_collect(spbas_matrix *result, /* I/O: the Cholesky factor matrix being constructed. 36 Only the storage, not the contents of this matrix 37 is changed in this function */ 38 PetscInt i_row, /* I : Number of rows for which the final contents are known */ 39 PetscInt *n_row_alloc_ok, /* I/O: Number of rows which are already in their final 40 places in the arrays: they need not be moved any more */ 41 PetscInt *n_alloc_used, /* I/O: */ 42 PetscInt *max_row_nnz) /* I : Over-estimate of the number of nonzeros needed to store each row */ 43 { 44 /* PSEUDO-CODE: 45 1. Choose the appropriate size for the arrays 46 2. Rescue the arrays which would be lost during garbage collection 47 3. Reallocate and correct administration 48 4. Move all arrays so that they are in proper order */ 49 50 PetscInt i,j; 51 PetscInt nrows = result->nrows; 52 PetscInt n_alloc_ok=0; 53 PetscInt n_alloc_ok_max=0; 54 PetscErrorCode ierr; 55 PetscInt need_already= 0; 56 PetscInt n_rows_ahead=0; 57 PetscInt max_need_extra= 0; 58 PetscInt n_alloc_max, n_alloc_est, n_alloc; 59 PetscInt n_alloc_now = result->n_alloc_icol; 60 PetscInt *alloc_icol_old = result->alloc_icol; 61 PetscScalar *alloc_val_old = result->alloc_val; 62 PetscInt *icol_rescue; 63 PetscScalar *val_rescue; 64 PetscInt n_rescue; 65 PetscInt n_row_rescue; 66 PetscInt i_here, i_last, n_copy; 67 const PetscReal xtra_perc = 20; 68 69 PetscFunctionBegin; 70 /********************************************************* 71 1. Choose appropriate array size 72 Count number of rows and memory usage which is already final */ 73 for (i=0;i<i_row; i++) { 74 n_alloc_ok += result->row_nnz[i]; 75 n_alloc_ok_max += max_row_nnz[i]; 76 } 77 78 /* Count currently needed memory usage and future memory requirements 79 (max, predicted)*/ 80 for (i=i_row; i<nrows; i++) { 81 if (!result->row_nnz[i]) { 82 max_need_extra += max_row_nnz[i]; 83 } else { 84 need_already += max_row_nnz[i]; 85 n_rows_ahead++; 86 } 87 } 88 89 /* Make maximal and realistic memory requirement estimates */ 90 n_alloc_max = n_alloc_ok + need_already + max_need_extra; 91 n_alloc_est = n_alloc_ok + need_already + (int) (((PetscReal) max_need_extra) * ((PetscReal) n_alloc_ok) /((PetscReal) n_alloc_ok_max)); 92 93 /* Choose array sizes */ 94 if (n_alloc_max == n_alloc_est) { n_alloc = n_alloc_max; } 95 else if (n_alloc_now >= n_alloc_est) { n_alloc = n_alloc_now; } 96 else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) { n_alloc = n_alloc_max; } 97 else { n_alloc = (int) (n_alloc_est * (1+xtra_perc/100.0)); } 98 99 /* If new estimate is less than what we already have, 100 don't reallocate, just garbage-collect */ 101 if (n_alloc_max != n_alloc_est && n_alloc < result->n_alloc_icol) { 102 n_alloc = result->n_alloc_icol; 103 } 104 105 /* Motivate dimension choice */ 106 ierr = PetscInfo1(PETSC_NULL," Allocating %d nonzeros: ",n_alloc);CHKERRQ(ierr); 107 if (n_alloc_max == n_alloc_est) { ierr = PetscInfo(PETSC_NULL,"this is the correct size\n");CHKERRQ(ierr); } 108 else if (n_alloc_now >= n_alloc_est) { ierr = PetscInfo(PETSC_NULL,"the current size, which seems enough\n");CHKERRQ(ierr); } 109 else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) { ierr = PetscInfo(PETSC_NULL,"the maximum estimate\n");CHKERRQ(ierr); } 110 else { ierr = PetscInfo1(PETSC_NULL,"%6.2f %% more than the estimate\n",xtra_perc);CHKERRQ(ierr); } 111 112 113 /********************************************************** 114 2. Rescue arrays which would be lost 115 Count how many rows and nonzeros will have to be rescued 116 when moving all arrays in place */ 117 n_row_rescue = 0; n_rescue = 0; 118 if (*n_row_alloc_ok==0) { *n_alloc_used = 0; } 119 else { 120 i = *n_row_alloc_ok - 1; 121 *n_alloc_used = (result->icols[i]-result->alloc_icol) + result->row_nnz[i]; 122 } 123 124 for (i=*n_row_alloc_ok; i<nrows; i++) { 125 i_here = result->icols[i]-result->alloc_icol; 126 i_last = i_here + result->row_nnz[i]; 127 if (result->row_nnz[i]>0) { 128 if (*n_alloc_used > i_here || i_last > n_alloc) { 129 n_rescue += result->row_nnz[i]; 130 n_row_rescue++; 131 } 132 133 if (i<i_row) { *n_alloc_used += result->row_nnz[i];} 134 else { *n_alloc_used += max_row_nnz[i];} 135 } 136 } 137 138 /* Allocate rescue arrays */ 139 ierr = PetscMalloc(n_rescue * sizeof(int), &icol_rescue);CHKERRQ(ierr); 140 ierr = PetscMalloc(n_rescue * sizeof(PetscScalar), &val_rescue);CHKERRQ(ierr); 141 142 /* Rescue the arrays which need rescuing */ 143 n_row_rescue = 0; n_rescue = 0; 144 if (*n_row_alloc_ok==0) { *n_alloc_used = 0; } 145 else { 146 i = *n_row_alloc_ok - 1; 147 *n_alloc_used = (result->icols[i]-result->alloc_icol) + result->row_nnz[i]; 148 } 149 150 for (i=*n_row_alloc_ok; i<nrows; i++) { 151 i_here = result->icols[i]-result->alloc_icol; 152 i_last = i_here + result->row_nnz[i]; 153 if (result->row_nnz[i]>0) { 154 if (*n_alloc_used > i_here || i_last > n_alloc) { 155 ierr = PetscMemcpy((void*) &icol_rescue[n_rescue], (void*) result->icols[i], result->row_nnz[i] * sizeof(int));CHKERRQ(ierr); 156 ierr = PetscMemcpy((void*) &val_rescue[n_rescue], (void*) result->values[i], result->row_nnz[i] * sizeof(PetscScalar));CHKERRQ(ierr); 157 n_rescue += result->row_nnz[i]; 158 n_row_rescue++; 159 } 160 161 if (i<i_row) { *n_alloc_used += result->row_nnz[i];} 162 else { *n_alloc_used += max_row_nnz[i];} 163 } 164 } 165 166 /********************************************************** 167 3. Reallocate and correct administration */ 168 169 if (n_alloc != result->n_alloc_icol) { 170 n_copy = PetscMin(n_alloc,result->n_alloc_icol); 171 172 /* PETSC knows no REALLOC, so we'll REALLOC ourselves. 173 174 Allocate new icol-data, copy old contents */ 175 ierr = PetscMalloc(n_alloc * sizeof(int), &result->alloc_icol);CHKERRQ(ierr); 176 ierr = PetscMemcpy(result->alloc_icol, alloc_icol_old, n_copy*sizeof(int));CHKERRQ(ierr); 177 178 /* Update administration, Reset pointers to new arrays */ 179 result->n_alloc_icol = n_alloc; 180 for (i=0; i<nrows; i++) { 181 result->icols[i] = result->alloc_icol + (result->icols[i] - alloc_icol_old); 182 result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol); 183 } 184 185 /* Delete old array */ 186 ierr = PetscFree(alloc_icol_old);CHKERRQ(ierr); 187 188 /* Allocate new value-data, copy old contents */ 189 ierr = PetscMalloc(n_alloc * sizeof(PetscScalar), &result->alloc_val);CHKERRQ(ierr); 190 ierr = PetscMemcpy(result->alloc_val, alloc_val_old, n_copy*sizeof(PetscScalar));CHKERRQ(ierr); 191 192 /* Update administration, Reset pointers to new arrays */ 193 result->n_alloc_val = n_alloc; 194 for (i=0; i<nrows; i++) { 195 result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol); 196 } 197 198 /* Delete old array */ 199 ierr = PetscFree(alloc_val_old);CHKERRQ(ierr); 200 } 201 202 203 /********************************************************* 204 4. Copy all the arrays to their proper places */ 205 n_row_rescue = 0; n_rescue = 0; 206 if (*n_row_alloc_ok==0) { *n_alloc_used = 0; } 207 else { 208 i = *n_row_alloc_ok - 1; 209 *n_alloc_used = (result->icols[i]-result->alloc_icol) + result->row_nnz[i]; 210 } 211 212 for (i=*n_row_alloc_ok; i<nrows; i++) { 213 i_here = result->icols[i]-result->alloc_icol; 214 i_last = i_here + result->row_nnz[i]; 215 216 result->icols[i] = result->alloc_icol + *n_alloc_used; 217 result->values[i]= result->alloc_val + *n_alloc_used; 218 219 if (result->row_nnz[i]>0) { 220 if (*n_alloc_used > i_here || i_last > n_alloc) { 221 ierr = PetscMemcpy((void*) result->icols[i], (void*) &icol_rescue[n_rescue], result->row_nnz[i] * sizeof(int));CHKERRQ(ierr); 222 ierr = PetscMemcpy((void*) result->values[i], (void*) &val_rescue[n_rescue],result->row_nnz[i] * sizeof(PetscScalar));CHKERRQ(ierr); 223 n_rescue += result->row_nnz[i]; 224 n_row_rescue++; 225 } else { 226 for (j=0; j<result->row_nnz[i]; j++) { 227 result->icols[i][j] = result->alloc_icol[i_here+j]; 228 result->values[i][j] = result->alloc_val[ i_here+j]; 229 } 230 } 231 if (i<i_row) { *n_alloc_used += result->row_nnz[i];} 232 else { *n_alloc_used += max_row_nnz[i];} 233 } 234 } 235 236 /* Delete the rescue arrays */ 237 ierr = PetscFree(icol_rescue);CHKERRQ(ierr); 238 ierr = PetscFree(val_rescue);CHKERRQ(ierr); 239 *n_row_alloc_ok = i_row; 240 PetscFunctionReturn(0); 241 } 242 243 /* 244 spbas_incomplete_cholesky: 245 incomplete Cholesky decomposition of a square, symmetric, 246 positive definite matrix. 247 248 In case negative diagonals are encountered, function returns 249 NEGATIVE_DIAGONAL. When this happens, call this function again 250 with a larger epsdiag_in, a less sparse pattern, and/or a smaller 251 droptol 252 */ 253 #undef __FUNCT__ 254 #define __FUNCT__ "spbas_incomplete_cholesky" 255 PetscErrorCode spbas_incomplete_cholesky(Mat A, const PetscInt *rip, const PetscInt *riip, spbas_matrix pattern, PetscReal droptol, PetscReal epsdiag_in, spbas_matrix * matrix_L) 256 { 257 PetscInt jL; 258 Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 259 PetscInt *ai=a->i,*aj=a->j; 260 MatScalar *aa=a->a; 261 PetscInt nrows, ncols; 262 PetscInt *max_row_nnz; 263 PetscErrorCode ierr; 264 spbas_matrix retval; 265 PetscScalar * diag; 266 PetscScalar * val; 267 PetscScalar * lvec; 268 PetscScalar epsdiag; 269 PetscInt i,j,k; 270 const PetscBool do_values = PETSC_TRUE; 271 PetscInt * r1_icol; 272 PetscScalar *r1_val; 273 PetscInt * r_icol; 274 PetscInt r_nnz; 275 PetscScalar *r_val; 276 PetscInt * A_icol; 277 PetscInt A_nnz; 278 PetscScalar *A_val; 279 PetscInt * p_icol; 280 PetscInt p_nnz; 281 PetscInt n_row_alloc_ok = 0; /* number of rows which have been stored correctly in the matrix */ 282 PetscInt n_alloc_used = 0; /* part of result->icols and result->values which is currently being used */ 283 284 PetscFunctionBegin; 285 /* Convert the Manteuffel shift from 'fraction of average diagonal' to dimensioned value */ 286 ierr = MatGetSize(A, &nrows, &ncols);CHKERRQ(ierr); 287 ierr = MatGetTrace(A, &epsdiag);CHKERRQ(ierr); 288 epsdiag *= epsdiag_in / nrows; 289 ierr = PetscInfo2(PETSC_NULL," Dimensioned Manteuffel shift %G Drop tolerance %G\n", PetscRealPart(epsdiag),droptol);CHKERRQ(ierr); 290 291 if (droptol<1e-10) {droptol=1e-10;} 292 293 if ((nrows != pattern.nrows) || (ncols != pattern.ncols) || (ncols != nrows)) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP,"Dimension error in spbas_incomplete_cholesky\n"); 294 295 retval.nrows = nrows; 296 retval.ncols = nrows; 297 retval.nnz = pattern.nnz/10; 298 retval.col_idx_type = SPBAS_COLUMN_NUMBERS; 299 retval.block_data = PETSC_TRUE; 300 301 ierr = spbas_allocate_pattern(&retval, do_values);CHKERRQ(ierr); 302 ierr = PetscMemzero((void*) retval.row_nnz, nrows*sizeof(int));CHKERRQ(ierr); 303 ierr = spbas_allocate_data(&retval);CHKERRQ(ierr); 304 retval.nnz = 0; 305 306 ierr = PetscMalloc(nrows*sizeof(PetscScalar), &diag);CHKERRQ(ierr); 307 ierr = PetscMalloc(nrows*sizeof(PetscScalar), &val);CHKERRQ(ierr); 308 ierr = PetscMalloc(nrows*sizeof(PetscScalar), &lvec);CHKERRQ(ierr); 309 ierr = PetscMalloc(nrows*sizeof(int), &max_row_nnz);CHKERRQ(ierr); 310 if (!(diag && val && lvec && max_row_nnz)) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MEM, "Allocation error in spbas_incomplete_cholesky\n"); 311 312 ierr = PetscMemzero((void*) val, nrows*sizeof(PetscScalar));CHKERRQ(ierr); 313 ierr = PetscMemzero((void*) lvec, nrows*sizeof(PetscScalar));CHKERRQ(ierr); 314 ierr = PetscMemzero((void*) max_row_nnz, nrows*sizeof(int));CHKERRQ(ierr); 315 316 /* Check correct format of sparseness pattern */ 317 if (pattern.col_idx_type != SPBAS_DIAGONAL_OFFSETS) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Error in spbas_incomplete_cholesky: must have diagonal offsets in pattern\n"); 318 319 /* Count the nonzeros on transpose of pattern */ 320 for (i = 0; i<nrows; i++) { 321 p_nnz = pattern.row_nnz[i]; 322 p_icol = pattern.icols[i]; 323 for (j=0; j<p_nnz; j++) { 324 max_row_nnz[i+p_icol[j]]++; 325 } 326 } 327 328 /* Calculate rows of L */ 329 for (i = 0; i<nrows; i++) { 330 p_nnz = pattern.row_nnz[i]; 331 p_icol = pattern.icols[i]; 332 333 r_nnz = retval.row_nnz[i]; 334 r_icol = retval.icols[i]; 335 r_val = retval.values[i]; 336 A_nnz = ai[rip[i]+1] - ai[rip[i]]; 337 A_icol = &aj[ai[rip[i]]]; 338 A_val = &aa[ai[rip[i]]]; 339 340 /* Calculate val += A(i,i:n)'; */ 341 for (j=0; j<A_nnz; j++) { 342 k = riip[A_icol[j]]; 343 if (k>=i) { val[k] = A_val[j]; } 344 } 345 346 /* Add regularization */ 347 val[i] += epsdiag; 348 349 /* Calculate lvec = diag(D(0:i-1)) * L(0:i-1,i); 350 val(i) = A(i,i) - L(0:i-1,i)' * lvec */ 351 for (j=0; j<r_nnz; j++) { 352 k = r_icol[j]; 353 lvec[k] = diag[k] * r_val[j]; 354 val[i] -= r_val[j] * lvec[k]; 355 } 356 357 /* Calculate the new diagonal */ 358 diag[i] = val[i]; 359 if (PetscRealPart(diag[i])<droptol) { 360 ierr = PetscInfo(PETSC_NULL,"Error in spbas_incomplete_cholesky:\n"); 361 ierr = PetscInfo1(PETSC_NULL,"Negative diagonal in row %d\n",i+1); 362 363 /* Delete the whole matrix at once. */ 364 ierr = spbas_delete(retval); 365 return NEGATIVE_DIAGONAL; 366 } 367 368 /* If necessary, allocate arrays */ 369 if (r_nnz==0) { 370 ierr = spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used);CHKERRQ(ierr); 371 if (ierr == PETSC_ERR_MEM) { 372 ierr = spbas_cholesky_garbage_collect(&retval, i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr); 373 ierr = spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used); 374 } 375 r_icol = retval.icols[i]; 376 r_val = retval.values[i]; 377 } 378 379 /* Now, fill in */ 380 r_icol[r_nnz] = i; 381 r_val [r_nnz] = 1.0; 382 r_nnz++; 383 retval.row_nnz[i]++; 384 385 retval.nnz += r_nnz; 386 387 /* Calculate 388 val(i+1:n) = (A(i,i+1:n)- L(0:i-1,i+1:n)' * lvec)/diag(i) */ 389 for (j=1; j<p_nnz; j++) { 390 k = i+p_icol[j]; 391 r1_icol = retval.icols[k]; 392 r1_val = retval.values[k]; 393 for (jL=0; jL<retval.row_nnz[k]; jL++) { 394 val[k] -= r1_val[jL] * lvec[r1_icol[jL]]; 395 } 396 val[k] /= diag[i]; 397 398 if (PetscAbsScalar(val[k]) > droptol || PetscAbsScalar(val[k])< -droptol) { 399 /* If necessary, allocate arrays */ 400 if (retval.row_nnz[k]==0) { 401 ierr = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used); 402 if (ierr == PETSC_ERR_MEM) { 403 ierr = spbas_cholesky_garbage_collect(&retval, i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr); 404 ierr = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);CHKERRQ(ierr); 405 r_icol = retval.icols[i]; 406 r_val = retval.values[i]; 407 } 408 } 409 410 retval.icols[k][retval.row_nnz[k]] = i; 411 retval.values[k][retval.row_nnz[k]] = val[k]; 412 retval.row_nnz[k]++; 413 } 414 val[k] = 0; 415 } 416 417 /* Erase the values used in the work arrays */ 418 for (j=0; j<r_nnz; j++) { lvec[r_icol[j]] = 0; } 419 } 420 421 ierr=PetscFree(lvec);CHKERRQ(ierr); 422 ierr=PetscFree(val);CHKERRQ(ierr); 423 424 ierr = spbas_cholesky_garbage_collect(&retval, nrows, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr); 425 ierr=PetscFree(max_row_nnz);CHKERRQ(ierr); 426 427 /* Place the inverse of the diagonals in the matrix */ 428 for (i=0; i<nrows; i++) { 429 r_nnz = retval.row_nnz[i]; 430 retval.values[i][r_nnz-1] = 1.0 / diag[i]; 431 for (j=0; j<r_nnz-1; j++) { 432 retval.values[i][j] *= -1; 433 } 434 } 435 ierr=PetscFree(diag);CHKERRQ(ierr); 436 *matrix_L = retval; 437 PetscFunctionReturn(0); 438 } 439 440