1b2f1ab58SBarry Smith 2b2f1ab58SBarry Smith /* 3b2f1ab58SBarry Smith spbas_cholesky_row_alloc: 4b2f1ab58SBarry Smith in the data arrays, find a place where another row may be stored. 5b2f1ab58SBarry Smith Return PETSC_ERR_MEM if there is insufficient space to store the 6b2f1ab58SBarry Smith row, so garbage collection and/or re-allocation may be done. 7b2f1ab58SBarry Smith */ 8b2f1ab58SBarry Smith #undef __FUNCT__ 9b2f1ab58SBarry Smith #define __FUNCT__ "spbas_cholesky_row_alloc" 10b2f1ab58SBarry Smith PetscErrorCode spbas_cholesky_row_alloc(spbas_matrix retval, PetscInt k, PetscInt r_nnz,PetscInt * n_alloc_used) 11b2f1ab58SBarry Smith { 12b2f1ab58SBarry Smith PetscFunctionBegin; 13b2f1ab58SBarry Smith retval.icols[k] = &retval.alloc_icol[*n_alloc_used]; 14b2f1ab58SBarry Smith retval.values[k] = &retval.alloc_val[*n_alloc_used]; 15b2f1ab58SBarry Smith *n_alloc_used += r_nnz; 16b2f1ab58SBarry Smith if (*n_alloc_used > retval.n_alloc_icol) 17b2f1ab58SBarry Smith { 18b2f1ab58SBarry Smith PetscFunctionReturn(PETSC_ERR_MEM); 19b2f1ab58SBarry Smith } 20b2f1ab58SBarry Smith else 21b2f1ab58SBarry Smith { 22b2f1ab58SBarry Smith PetscFunctionReturn(0); 23b2f1ab58SBarry Smith } 24b2f1ab58SBarry Smith } 25b2f1ab58SBarry Smith 26b2f1ab58SBarry Smith 27b2f1ab58SBarry Smith /* 28b2f1ab58SBarry Smith spbas_cholesky_garbage_collect: 29b2f1ab58SBarry Smith move the rows which have been calculated so far, as well as 30b2f1ab58SBarry Smith those currently under construction, to the front of the 31b2f1ab58SBarry Smith array, while putting them in the proper order. 32b2f1ab58SBarry Smith When it seems necessary, enlarge the current arrays. 33b2f1ab58SBarry Smith 34b2f1ab58SBarry Smith NB: re-allocation is being simulated using 35b2f1ab58SBarry Smith PetscMalloc, memcpy, PetscFree, because 36b2f1ab58SBarry Smith PetscRealloc does not seem to exist. 37b2f1ab58SBarry Smith 38b2f1ab58SBarry Smith */ 39b2f1ab58SBarry Smith #undef __FUNCT__ 40b2f1ab58SBarry Smith #define __FUNCT__ "spbas_cholesky_garbage_collect" 41b2f1ab58SBarry Smith PetscErrorCode spbas_cholesky_garbage_collect( 42c328eeadSBarry Smith spbas_matrix * result, /* I/O: the Cholesky factor matrix being constructed. 43c328eeadSBarry Smith Only the storage, not the contents of this matrix 44c328eeadSBarry Smith is changed in this function */ 45c328eeadSBarry Smith PetscInt i_row, /* I : Number of rows for which the final contents are 46c328eeadSBarry Smith known */ 47c328eeadSBarry Smith PetscInt *n_row_alloc_ok, /* I/O: Number of rows which are already in their final 48c328eeadSBarry Smith places in the arrays: they need not be moved any more */ 49c328eeadSBarry Smith PetscInt *n_alloc_used, /* I/O: */ 50c328eeadSBarry Smith PetscInt *max_row_nnz /*I : Over-estimate of the number of nonzeros needed to 51c328eeadSBarry Smith store each row */ 52b2f1ab58SBarry Smith ) 53b2f1ab58SBarry Smith { 54b2f1ab58SBarry Smith 55b2f1ab58SBarry Smith 56c328eeadSBarry Smith /* PSEUDO-CODE: 57c328eeadSBarry Smith 1. Choose the appropriate size for the arrays 58c328eeadSBarry Smith 2. Rescue the arrays which would be lost during garbage collection 59c328eeadSBarry Smith 3. Reallocate and correct administration 60c328eeadSBarry Smith 4. Move all arrays so that they are in proper order */ 61b2f1ab58SBarry Smith 62b2f1ab58SBarry Smith PetscInt i,j; 63b2f1ab58SBarry Smith PetscInt nrows = result->nrows; 64b2f1ab58SBarry Smith PetscInt n_alloc_ok=0; 65b2f1ab58SBarry Smith PetscInt n_alloc_ok_max=0; 66b2f1ab58SBarry Smith 67b2f1ab58SBarry Smith PetscErrorCode ierr; 68b2f1ab58SBarry Smith PetscInt need_already= 0; 69b2f1ab58SBarry Smith PetscInt n_rows_ahead=0; 70b2f1ab58SBarry Smith PetscInt max_need_extra= 0; 71b2f1ab58SBarry Smith PetscInt n_alloc_max, n_alloc_est, n_alloc; 72b2f1ab58SBarry Smith PetscInt n_alloc_now = result->n_alloc_icol; 73b2f1ab58SBarry Smith PetscInt *alloc_icol_old = result->alloc_icol; 74b2f1ab58SBarry Smith PetscScalar *alloc_val_old = result->alloc_val; 75b2f1ab58SBarry Smith PetscInt *icol_rescue; 76b2f1ab58SBarry Smith PetscScalar *val_rescue; 77b2f1ab58SBarry Smith PetscInt n_rescue; 78b2f1ab58SBarry Smith PetscInt n_row_rescue; 79b2f1ab58SBarry Smith PetscInt i_here, i_last, n_copy; 80d36aa34eSBarry Smith const PetscReal xtra_perc = 20; 81b2f1ab58SBarry Smith 82b2f1ab58SBarry Smith PetscFunctionBegin; 83b2f1ab58SBarry Smith 84c328eeadSBarry Smith /********************************************************* 85c328eeadSBarry Smith 1. Choose appropriate array size 86c328eeadSBarry Smith Count number of rows and memory usage which is already final */ 87b2f1ab58SBarry Smith for (i=0;i<i_row; i++) 88b2f1ab58SBarry Smith { 89b2f1ab58SBarry Smith n_alloc_ok += result->row_nnz[i]; 90b2f1ab58SBarry Smith n_alloc_ok_max += max_row_nnz[i]; 91b2f1ab58SBarry Smith } 92b2f1ab58SBarry Smith 93c328eeadSBarry Smith /* Count currently needed memory usage and future memory requirements 94c328eeadSBarry Smith (max, predicted)*/ 95b2f1ab58SBarry Smith for (i=i_row; i<nrows; i++) 96b2f1ab58SBarry Smith { 97b2f1ab58SBarry Smith if (result->row_nnz[i]==0) 98b2f1ab58SBarry Smith { 99b2f1ab58SBarry Smith max_need_extra += max_row_nnz[i]; 100b2f1ab58SBarry Smith } 101b2f1ab58SBarry Smith else 102b2f1ab58SBarry Smith { 103b2f1ab58SBarry Smith need_already += max_row_nnz[i]; 104b2f1ab58SBarry Smith n_rows_ahead++; 105b2f1ab58SBarry Smith } 106b2f1ab58SBarry Smith } 107b2f1ab58SBarry Smith 108c328eeadSBarry Smith /* Make maximal and realistic memory requirement estimates */ 109b2f1ab58SBarry Smith n_alloc_max = n_alloc_ok + need_already + max_need_extra; 110d36aa34eSBarry Smith n_alloc_est = n_alloc_ok + need_already + (int) (((PetscReal) max_need_extra) * 111d36aa34eSBarry Smith ((PetscReal) n_alloc_ok) /((PetscReal) n_alloc_ok_max)); 112b2f1ab58SBarry Smith 113c328eeadSBarry Smith /* Choose array sizes */ 114b2f1ab58SBarry Smith if (n_alloc_max == n_alloc_est) 115b2f1ab58SBarry Smith { n_alloc = n_alloc_max; } 116b2f1ab58SBarry Smith else if (n_alloc_now >= n_alloc_est) 117b2f1ab58SBarry Smith { n_alloc = n_alloc_now; } 118b2f1ab58SBarry Smith else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) 119b2f1ab58SBarry Smith { n_alloc = n_alloc_max; } 120b2f1ab58SBarry Smith else 121b2f1ab58SBarry Smith { n_alloc = (int) (n_alloc_est * (1+xtra_perc/100.0)); } 122b2f1ab58SBarry Smith 123c328eeadSBarry Smith /* If new estimate is less than what we already have, 124c328eeadSBarry Smith don't reallocate, just garbage-collect */ 125b2f1ab58SBarry Smith if (n_alloc_max != n_alloc_est && n_alloc < result->n_alloc_icol) 126b2f1ab58SBarry Smith { 127b2f1ab58SBarry Smith n_alloc = result->n_alloc_icol; 128b2f1ab58SBarry Smith } 129b2f1ab58SBarry Smith 130c328eeadSBarry Smith /* Motivate dimension choice */ 131c328eeadSBarry Smith ierr = PetscInfo1(PETSC_NULL," Allocating %d nonzeros: ",n_alloc);CHKERRQ(ierr); 132b2f1ab58SBarry Smith if (n_alloc_max == n_alloc_est) 133c328eeadSBarry Smith { ierr = PetscInfo(PETSC_NULL,"this is the correct size\n");CHKERRQ(ierr); } 134b2f1ab58SBarry Smith else if (n_alloc_now >= n_alloc_est) 135c328eeadSBarry Smith { ierr = PetscInfo(PETSC_NULL,"the current size, which seems enough\n");CHKERRQ(ierr); } 136b2f1ab58SBarry Smith else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) 137c328eeadSBarry Smith { ierr = PetscInfo(PETSC_NULL,"the maximum estimate\n");CHKERRQ(ierr); } 138b2f1ab58SBarry Smith else 139c328eeadSBarry Smith { ierr = PetscInfo1(PETSC_NULL,"%6.2f %% more than the estimate\n",xtra_perc);CHKERRQ(ierr); } 140b2f1ab58SBarry Smith 141b2f1ab58SBarry Smith 142c328eeadSBarry Smith /********************************************************** 143c328eeadSBarry Smith 2. Rescue arrays which would be lost 144c328eeadSBarry Smith Count how many rows and nonzeros will have to be rescued 145c328eeadSBarry Smith when moving all arrays in place */ 146b2f1ab58SBarry Smith n_row_rescue = 0; n_rescue = 0; 147b2f1ab58SBarry Smith if (*n_row_alloc_ok==0) { *n_alloc_used = 0; } 148b2f1ab58SBarry Smith else 149b2f1ab58SBarry Smith { 150b2f1ab58SBarry Smith i = *n_row_alloc_ok - 1; 151b2f1ab58SBarry Smith *n_alloc_used = (result->icols[i]-result->alloc_icol) + 152b2f1ab58SBarry Smith result->row_nnz[i]; 153b2f1ab58SBarry Smith } 154b2f1ab58SBarry Smith 155b2f1ab58SBarry Smith for (i=*n_row_alloc_ok; i<nrows; i++) 156b2f1ab58SBarry Smith { 157b2f1ab58SBarry Smith i_here = result->icols[i]-result->alloc_icol; 158b2f1ab58SBarry Smith i_last = i_here + result->row_nnz[i]; 159b2f1ab58SBarry Smith if (result->row_nnz[i]>0) 160b2f1ab58SBarry Smith { 161b2f1ab58SBarry Smith if (*n_alloc_used > i_here || i_last > n_alloc) 162b2f1ab58SBarry Smith { 163b2f1ab58SBarry Smith n_rescue += result->row_nnz[i]; 164b2f1ab58SBarry Smith n_row_rescue++; 165b2f1ab58SBarry Smith } 166b2f1ab58SBarry Smith 167b2f1ab58SBarry Smith if (i<i_row) { *n_alloc_used += result->row_nnz[i];} 168b2f1ab58SBarry Smith else { *n_alloc_used += max_row_nnz[i];} 169b2f1ab58SBarry Smith } 170b2f1ab58SBarry Smith } 171b2f1ab58SBarry Smith 172c328eeadSBarry Smith /* Allocate rescue arrays */ 173b2f1ab58SBarry Smith ierr = PetscMalloc( n_rescue * sizeof(int), &icol_rescue); 174b2f1ab58SBarry Smith CHKERRQ(ierr); 175b2f1ab58SBarry Smith ierr = PetscMalloc( n_rescue * sizeof(PetscScalar), &val_rescue); 176b2f1ab58SBarry Smith CHKERRQ(ierr); 177b2f1ab58SBarry Smith 178c328eeadSBarry Smith /* Rescue the arrays which need rescuing */ 179b2f1ab58SBarry Smith n_row_rescue = 0; n_rescue = 0; 180b2f1ab58SBarry Smith if (*n_row_alloc_ok==0) { *n_alloc_used = 0; } 181b2f1ab58SBarry Smith else 182b2f1ab58SBarry Smith { 183b2f1ab58SBarry Smith i = *n_row_alloc_ok - 1; 184b2f1ab58SBarry Smith *n_alloc_used = (result->icols[i]-result->alloc_icol) + 185b2f1ab58SBarry Smith result->row_nnz[i]; 186b2f1ab58SBarry Smith } 187b2f1ab58SBarry Smith 188b2f1ab58SBarry Smith for (i=*n_row_alloc_ok; i<nrows; i++) 189b2f1ab58SBarry Smith { 190b2f1ab58SBarry Smith i_here = result->icols[i]-result->alloc_icol; 191b2f1ab58SBarry Smith i_last = i_here + result->row_nnz[i]; 192b2f1ab58SBarry Smith if (result->row_nnz[i]>0) 193b2f1ab58SBarry Smith { 194b2f1ab58SBarry Smith if (*n_alloc_used > i_here || i_last > n_alloc) 195b2f1ab58SBarry Smith { 196b2f1ab58SBarry Smith memcpy((void*) &icol_rescue[n_rescue], 197b2f1ab58SBarry Smith (void*) result->icols[i], 198b2f1ab58SBarry Smith result->row_nnz[i] * sizeof(int)); 199b2f1ab58SBarry Smith memcpy((void*) &val_rescue[n_rescue], 200b2f1ab58SBarry Smith (void*) result->values[i], 201b2f1ab58SBarry Smith result->row_nnz[i] * sizeof(PetscScalar)); 202b2f1ab58SBarry Smith n_rescue += result->row_nnz[i]; 203b2f1ab58SBarry Smith n_row_rescue++; 204b2f1ab58SBarry Smith } 205b2f1ab58SBarry Smith 206b2f1ab58SBarry Smith if (i<i_row) { *n_alloc_used += result->row_nnz[i];} 207b2f1ab58SBarry Smith else { *n_alloc_used += max_row_nnz[i];} 208b2f1ab58SBarry Smith } 209b2f1ab58SBarry Smith } 210b2f1ab58SBarry Smith 211c328eeadSBarry Smith /********************************************************** 212c328eeadSBarry Smith 3. Reallocate and correct administration */ 213b2f1ab58SBarry Smith 214b2f1ab58SBarry Smith if (n_alloc != result->n_alloc_icol) 215b2f1ab58SBarry Smith { 216b2f1ab58SBarry Smith n_copy = MIN(n_alloc,result->n_alloc_icol); 217b2f1ab58SBarry Smith 218c328eeadSBarry Smith /* PETSC knows no REALLOC, so we'll REALLOC ourselves. 219b2f1ab58SBarry Smith 220c328eeadSBarry Smith Allocate new icol-data, copy old contents */ 221b2f1ab58SBarry Smith ierr = PetscMalloc( n_alloc * sizeof(int), 222b2f1ab58SBarry Smith &result->alloc_icol); CHKERRQ(ierr); 223b2f1ab58SBarry Smith memcpy(result->alloc_icol, alloc_icol_old, n_copy*sizeof(int)); 224b2f1ab58SBarry Smith 225c328eeadSBarry Smith /* Update administration, Reset pointers to new arrays */ 226b2f1ab58SBarry Smith result->n_alloc_icol = n_alloc; 227b2f1ab58SBarry Smith for (i=0; i<nrows; i++) 228b2f1ab58SBarry Smith { 229b2f1ab58SBarry Smith result->icols[i] = result->alloc_icol + 230b2f1ab58SBarry Smith (result->icols[i] - alloc_icol_old); 231b2f1ab58SBarry Smith result->values[i] = result->alloc_val + 232b2f1ab58SBarry Smith (result->icols[i] - result->alloc_icol); 233b2f1ab58SBarry Smith } 234b2f1ab58SBarry Smith 235c328eeadSBarry Smith /* Delete old array */ 236b2f1ab58SBarry Smith ierr = PetscFree(alloc_icol_old); CHKERRQ(ierr); 237b2f1ab58SBarry Smith 238c328eeadSBarry Smith /* Allocate new value-data, copy old contents */ 239b2f1ab58SBarry Smith ierr = PetscMalloc( n_alloc * sizeof(PetscScalar), 240b2f1ab58SBarry Smith &result->alloc_val); CHKERRQ(ierr); 241b2f1ab58SBarry Smith memcpy(result->alloc_val, alloc_val_old, n_copy*sizeof(PetscScalar)); 242b2f1ab58SBarry Smith 243c328eeadSBarry Smith /* Update administration, Reset pointers to new arrays */ 244b2f1ab58SBarry Smith result->n_alloc_val = n_alloc; 245b2f1ab58SBarry Smith for (i=0; i<nrows; i++) 246b2f1ab58SBarry Smith { 247b2f1ab58SBarry Smith result->values[i] = result->alloc_val + 248b2f1ab58SBarry Smith (result->icols[i] - result->alloc_icol); 249b2f1ab58SBarry Smith } 250b2f1ab58SBarry Smith 251c328eeadSBarry Smith /* Delete old array */ 252b2f1ab58SBarry Smith ierr = PetscFree(alloc_val_old); CHKERRQ(ierr); 253b2f1ab58SBarry Smith } 254b2f1ab58SBarry Smith 255b2f1ab58SBarry Smith 256c328eeadSBarry Smith /********************************************************* 257c328eeadSBarry Smith 4. Copy all the arrays to their proper places */ 258b2f1ab58SBarry Smith n_row_rescue = 0; n_rescue = 0; 259b2f1ab58SBarry Smith if (*n_row_alloc_ok==0) { *n_alloc_used = 0; } 260b2f1ab58SBarry Smith else 261b2f1ab58SBarry Smith { 262b2f1ab58SBarry Smith i = *n_row_alloc_ok - 1; 263b2f1ab58SBarry Smith *n_alloc_used = (result->icols[i]-result->alloc_icol) + 264b2f1ab58SBarry Smith result->row_nnz[i]; 265b2f1ab58SBarry Smith } 266b2f1ab58SBarry Smith 267b2f1ab58SBarry Smith for (i=*n_row_alloc_ok; i<nrows; i++) 268b2f1ab58SBarry Smith { 269b2f1ab58SBarry Smith i_here = result->icols[i]-result->alloc_icol; 270b2f1ab58SBarry Smith i_last = i_here + result->row_nnz[i]; 271b2f1ab58SBarry Smith 272b2f1ab58SBarry Smith result->icols[i] = result->alloc_icol + *n_alloc_used; 273b2f1ab58SBarry Smith result->values[i]= result->alloc_val + *n_alloc_used; 274b2f1ab58SBarry Smith 275b2f1ab58SBarry Smith if (result->row_nnz[i]>0) 276b2f1ab58SBarry Smith { 277b2f1ab58SBarry Smith if (*n_alloc_used > i_here || i_last > n_alloc) 278b2f1ab58SBarry Smith { 279b2f1ab58SBarry Smith memcpy((void*) result->icols[i], 280b2f1ab58SBarry Smith (void*) &icol_rescue[n_rescue], 281b2f1ab58SBarry Smith result->row_nnz[i] * sizeof(int)); 282b2f1ab58SBarry Smith memcpy((void*) result->values[i], 283b2f1ab58SBarry Smith (void*) &val_rescue[n_rescue], 284b2f1ab58SBarry Smith result->row_nnz[i] * sizeof(PetscScalar)); 285b2f1ab58SBarry Smith n_rescue += result->row_nnz[i]; 286b2f1ab58SBarry Smith n_row_rescue++; 287b2f1ab58SBarry Smith } 288b2f1ab58SBarry Smith else 289b2f1ab58SBarry Smith { 290b2f1ab58SBarry Smith for (j=0; j<result->row_nnz[i]; j++) 291b2f1ab58SBarry Smith { 292b2f1ab58SBarry Smith result->icols[i][j] = result->alloc_icol[i_here+j]; 293b2f1ab58SBarry Smith result->values[i][j] = result->alloc_val[ i_here+j]; 294b2f1ab58SBarry Smith } 295b2f1ab58SBarry Smith } 296b2f1ab58SBarry Smith if (i<i_row) { *n_alloc_used += result->row_nnz[i];} 297b2f1ab58SBarry Smith else { *n_alloc_used += max_row_nnz[i];} 298b2f1ab58SBarry Smith } 299b2f1ab58SBarry Smith } 300b2f1ab58SBarry Smith 301c328eeadSBarry Smith /* Delete the rescue arrays */ 302b2f1ab58SBarry Smith ierr = PetscFree(icol_rescue); CHKERRQ(ierr); 303b2f1ab58SBarry Smith ierr = PetscFree(val_rescue); CHKERRQ(ierr); 304b2f1ab58SBarry Smith *n_row_alloc_ok = i_row; 305b2f1ab58SBarry Smith 306b2f1ab58SBarry Smith PetscFunctionReturn(0); 307b2f1ab58SBarry Smith 308b2f1ab58SBarry Smith } 309b2f1ab58SBarry Smith 310b2f1ab58SBarry Smith /* 311b2f1ab58SBarry Smith spbas_incomplete_cholesky: 312b2f1ab58SBarry Smith incomplete Cholesky decomposition of a square, symmetric, 313b2f1ab58SBarry Smith positive definite matrix. 314b2f1ab58SBarry Smith 315b2f1ab58SBarry Smith In case negative diagonals are encountered, function returns 316b2f1ab58SBarry Smith NEGATIVE_DIAGONAL. When this happens, call this function again 317b2f1ab58SBarry Smith with a larger epsdiag_in, a less sparse pattern, and/or a smaller 318b2f1ab58SBarry Smith droptol 319b2f1ab58SBarry Smith */ 320b2f1ab58SBarry Smith #undef __FUNCT__ 321b2f1ab58SBarry Smith #define __FUNCT__ "spbas_incomplete_cholesky" 322b2f1ab58SBarry Smith PetscErrorCode spbas_incomplete_cholesky( 323b2f1ab58SBarry Smith Mat A, const PetscInt *rip, const PetscInt *riip, 324b2f1ab58SBarry Smith spbas_matrix pattern, 325d36aa34eSBarry Smith PetscReal droptol, PetscReal epsdiag_in, 326b2f1ab58SBarry Smith spbas_matrix * matrix_L) 327b2f1ab58SBarry Smith { 328b2f1ab58SBarry Smith PetscInt jL; 329b2f1ab58SBarry Smith Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 330b2f1ab58SBarry Smith PetscInt *ai=a->i,*aj=a->j; 331b2f1ab58SBarry Smith MatScalar *aa=a->a; 332b2f1ab58SBarry Smith PetscInt nrows, ncols; 333b2f1ab58SBarry Smith PetscInt *max_row_nnz; 334b2f1ab58SBarry Smith PetscErrorCode ierr; 335b2f1ab58SBarry Smith spbas_matrix retval; 336b2f1ab58SBarry Smith PetscScalar * diag; 337b2f1ab58SBarry Smith PetscScalar * val; 338b2f1ab58SBarry Smith PetscScalar * lvec; 339b2f1ab58SBarry Smith PetscScalar epsdiag; 340b2f1ab58SBarry Smith PetscInt i,j,k; 341b2f1ab58SBarry Smith const PetscTruth do_values = PETSC_TRUE; 342b2f1ab58SBarry Smith PetscInt * r1_icol; PetscScalar *r1_val; 343b2f1ab58SBarry Smith PetscInt * r_icol; PetscInt r_nnz; PetscScalar *r_val; 344b2f1ab58SBarry Smith PetscInt * A_icol; PetscInt A_nnz; PetscScalar *A_val; 345b2f1ab58SBarry Smith PetscInt * p_icol; PetscInt p_nnz; 346c328eeadSBarry Smith PetscInt n_row_alloc_ok = 0; /* number of rows which have been stored 347c328eeadSBarry Smith correctly in the matrix */ 348c328eeadSBarry Smith PetscInt n_alloc_used = 0; /* part of result->icols and result->values 349c328eeadSBarry Smith which is currently being used */ 350b2f1ab58SBarry Smith PetscFunctionBegin; 351b2f1ab58SBarry Smith 352c328eeadSBarry Smith /* Convert the Manteuffel shift from 'fraction of average diagonal' to 353c328eeadSBarry Smith dimensioned value */ 354b2f1ab58SBarry Smith ierr = MatGetSize(A, &nrows, &ncols); CHKERRQ(ierr); 355b2f1ab58SBarry Smith ierr = MatGetTrace(A, &epsdiag); CHKERRQ(ierr); 356b2f1ab58SBarry Smith epsdiag *= epsdiag_in / nrows; 3579767453cSBarry Smith ierr = PetscInfo2(PETSC_NULL," Dimensioned Manteuffel shift %G Drop tolerance %G\n", PetscRealPart(epsdiag),droptol);CHKERRQ(ierr); 358b2f1ab58SBarry Smith 359b2f1ab58SBarry Smith if (droptol<1e-10) {droptol=1e-10;} 360b2f1ab58SBarry Smith 361b2f1ab58SBarry Smith if ( (nrows != pattern.nrows) || 362b2f1ab58SBarry Smith (ncols != pattern.ncols) || 363b2f1ab58SBarry Smith (ncols != nrows) ) 364b2f1ab58SBarry Smith { 365b2f1ab58SBarry Smith SETERRQ( PETSC_ERR_ARG_INCOMP, 366b2f1ab58SBarry Smith "Dimension error in spbas_incomplete_cholesky\n"); 367b2f1ab58SBarry Smith } 368b2f1ab58SBarry Smith 369b2f1ab58SBarry Smith retval.nrows = nrows; 370b2f1ab58SBarry Smith retval.ncols = nrows; 371b2f1ab58SBarry Smith retval.nnz = pattern.nnz/10; 372b2f1ab58SBarry Smith retval.col_idx_type = SPBAS_COLUMN_NUMBERS; 373b2f1ab58SBarry Smith retval.block_data = PETSC_TRUE; 374b2f1ab58SBarry Smith 375b2f1ab58SBarry Smith ierr = spbas_allocate_pattern(&retval, do_values); CHKERRQ(ierr); 376b2f1ab58SBarry Smith memset((void*) retval.row_nnz, 0, nrows*sizeof(int)); 377b2f1ab58SBarry Smith ierr = spbas_allocate_data(&retval); CHKERRQ(ierr); 378b2f1ab58SBarry Smith retval.nnz = 0; 379b2f1ab58SBarry Smith 380b2f1ab58SBarry Smith ierr = PetscMalloc(nrows*sizeof(PetscScalar), &diag); CHKERRQ(ierr); 381b2f1ab58SBarry Smith ierr = PetscMalloc(nrows*sizeof(PetscScalar), &val); CHKERRQ(ierr); 382b2f1ab58SBarry Smith ierr = PetscMalloc(nrows*sizeof(PetscScalar), &lvec); CHKERRQ(ierr); 383b2f1ab58SBarry Smith ierr = PetscMalloc(nrows*sizeof(int), &max_row_nnz); CHKERRQ(ierr); 384b2f1ab58SBarry Smith if (!(diag && val && lvec && max_row_nnz)) 385b2f1ab58SBarry Smith { 386b2f1ab58SBarry Smith SETERRQ( PETSC_ERR_MEM, 387b2f1ab58SBarry Smith "Allocation error in spbas_incomplete_cholesky\n"); 388b2f1ab58SBarry Smith } 389b2f1ab58SBarry Smith 390b2f1ab58SBarry Smith memset((void*) val, 0, nrows*sizeof(PetscScalar)); 391b2f1ab58SBarry Smith memset((void*) lvec, 0, nrows*sizeof(PetscScalar)); 392b2f1ab58SBarry Smith memset((void*) max_row_nnz, 0, nrows*sizeof(int)); 393b2f1ab58SBarry Smith 394c328eeadSBarry Smith /* Check correct format of sparseness pattern */ 395b2f1ab58SBarry Smith if (pattern.col_idx_type != SPBAS_DIAGONAL_OFFSETS) 396b2f1ab58SBarry Smith { 397b2f1ab58SBarry Smith SETERRQ( PETSC_ERR_SUP_SYS, 398b2f1ab58SBarry Smith "Error in spbas_incomplete_cholesky: must have diagonal offsets in pattern\n"); 399b2f1ab58SBarry Smith } 400b2f1ab58SBarry Smith 401c328eeadSBarry Smith /* Count the nonzeros on transpose of pattern */ 402b2f1ab58SBarry Smith for (i = 0; i<nrows; i++) 403b2f1ab58SBarry Smith { 404b2f1ab58SBarry Smith p_nnz = pattern.row_nnz[i]; 405b2f1ab58SBarry Smith p_icol = pattern.icols[i]; 406b2f1ab58SBarry Smith for (j=0; j<p_nnz; j++) 407b2f1ab58SBarry Smith { 408b2f1ab58SBarry Smith max_row_nnz[i+p_icol[j]]++; 409b2f1ab58SBarry Smith } 410b2f1ab58SBarry Smith } 411b2f1ab58SBarry Smith 412c328eeadSBarry Smith /* Calculate rows of L */ 413b2f1ab58SBarry Smith for (i = 0; i<nrows; i++) 414b2f1ab58SBarry Smith { 415b2f1ab58SBarry Smith p_nnz = pattern.row_nnz[i]; 416b2f1ab58SBarry Smith p_icol = pattern.icols[i]; 417b2f1ab58SBarry Smith 418b2f1ab58SBarry Smith r_nnz = retval.row_nnz[i]; 419b2f1ab58SBarry Smith r_icol = retval.icols[i]; 420b2f1ab58SBarry Smith r_val = retval.values[i]; 421b2f1ab58SBarry Smith A_nnz = ai[rip[i]+1] - ai[rip[i]]; 422b2f1ab58SBarry Smith A_icol = &aj[ai[rip[i]]]; 423b2f1ab58SBarry Smith A_val = &aa[ai[rip[i]]]; 424b2f1ab58SBarry Smith 425c328eeadSBarry Smith /* Calculate val += A(i,i:n)'; */ 426b2f1ab58SBarry Smith for (j=0; j<A_nnz; j++) 427b2f1ab58SBarry Smith { 428b2f1ab58SBarry Smith k = riip[A_icol[j]]; 429b2f1ab58SBarry Smith if (k>=i) { val[k] = A_val[j]; } 430b2f1ab58SBarry Smith } 431b2f1ab58SBarry Smith 432c328eeadSBarry Smith /* Add regularization */ 433b2f1ab58SBarry Smith val[i] += epsdiag; 434b2f1ab58SBarry Smith 435c328eeadSBarry Smith /* Calculate lvec = diag(D(0:i-1)) * L(0:i-1,i); 436c328eeadSBarry Smith val(i) = A(i,i) - L(0:i-1,i)' * lvec */ 437b2f1ab58SBarry Smith for ( j=0; j<r_nnz; j++) 438b2f1ab58SBarry Smith { 439b2f1ab58SBarry Smith k = r_icol[j]; 440b2f1ab58SBarry Smith lvec[k] = diag[k] * r_val[j]; 441b2f1ab58SBarry Smith val[i] -= r_val[j] * lvec[k]; 442b2f1ab58SBarry Smith } 443b2f1ab58SBarry Smith 444c328eeadSBarry Smith /* Calculate the new diagonal */ 445b2f1ab58SBarry Smith diag[i] = val[i]; 4469767453cSBarry Smith if (PetscRealPart(diag[i])<droptol) 447b2f1ab58SBarry Smith { 448c328eeadSBarry Smith ierr = PetscInfo(PETSC_NULL,"Error in spbas_incomplete_cholesky:\n"); 449c328eeadSBarry Smith ierr = PetscInfo1(PETSC_NULL,"Negative diagonal in row %d\n",i+1); 450b2f1ab58SBarry Smith 451c328eeadSBarry Smith /* Delete the whole matrix at once. */ 452b2f1ab58SBarry Smith ierr = spbas_delete(retval); 453b2f1ab58SBarry Smith return NEGATIVE_DIAGONAL; 454b2f1ab58SBarry Smith } 455b2f1ab58SBarry Smith 456c328eeadSBarry Smith /* If necessary, allocate arrays */ 457b2f1ab58SBarry Smith if (r_nnz==0) 458b2f1ab58SBarry Smith { 459b2f1ab58SBarry Smith ierr = spbas_cholesky_row_alloc( retval, i, 1, &n_alloc_used); 460b2f1ab58SBarry Smith if (ierr == PETSC_ERR_MEM) 461b2f1ab58SBarry Smith { 462b2f1ab58SBarry Smith ierr = spbas_cholesky_garbage_collect( &retval, i, &n_row_alloc_ok, 463b2f1ab58SBarry Smith &n_alloc_used, max_row_nnz); 464b2f1ab58SBarry Smith CHKERRQ(ierr); 465b2f1ab58SBarry Smith ierr = spbas_cholesky_row_alloc( retval, i, 1, &n_alloc_used); 466b2f1ab58SBarry Smith } 467b2f1ab58SBarry Smith CHKERRQ(ierr); 468b2f1ab58SBarry Smith r_icol = retval.icols[i]; 469b2f1ab58SBarry Smith r_val = retval.values[i]; 470b2f1ab58SBarry Smith } 471b2f1ab58SBarry Smith 472c328eeadSBarry Smith /* Now, fill in */ 473b2f1ab58SBarry Smith r_icol[r_nnz] = i; 474b2f1ab58SBarry Smith r_val [r_nnz] = 1.0; 475b2f1ab58SBarry Smith r_nnz++; 476b2f1ab58SBarry Smith retval.row_nnz[i]++; 477b2f1ab58SBarry Smith 478b2f1ab58SBarry Smith retval.nnz += r_nnz; 479b2f1ab58SBarry Smith 480c328eeadSBarry Smith /* Calculate 481c328eeadSBarry Smith val(i+1:n) = (A(i,i+1:n)- L(0:i-1,i+1:n)' * lvec)/diag(i) */ 482b2f1ab58SBarry Smith for (j=1; j<p_nnz; j++) 483b2f1ab58SBarry Smith { 484b2f1ab58SBarry Smith k = i+p_icol[j]; 485b2f1ab58SBarry Smith r1_icol = retval.icols[k]; 486b2f1ab58SBarry Smith r1_val = retval.values[k]; 487b2f1ab58SBarry Smith for (jL=0; jL<retval.row_nnz[k]; jL++) 488b2f1ab58SBarry Smith { 489b2f1ab58SBarry Smith val[k] -= r1_val[jL] * lvec[r1_icol[jL]]; 490b2f1ab58SBarry Smith } 491b2f1ab58SBarry Smith val[k] /= diag[i]; 492b2f1ab58SBarry Smith 493d36aa34eSBarry Smith if (PetscAbsScalar(val[k]) > droptol || PetscAbsScalar(val[k])< -droptol) 494b2f1ab58SBarry Smith { 495c328eeadSBarry Smith /* If necessary, allocate arrays */ 496b2f1ab58SBarry Smith if (retval.row_nnz[k]==0) 497b2f1ab58SBarry Smith { 498b2f1ab58SBarry Smith ierr = spbas_cholesky_row_alloc( retval, k, max_row_nnz[k], 499b2f1ab58SBarry Smith &n_alloc_used); 500b2f1ab58SBarry Smith if (ierr == PETSC_ERR_MEM) 501b2f1ab58SBarry Smith { 502b2f1ab58SBarry Smith ierr = spbas_cholesky_garbage_collect( 503b2f1ab58SBarry Smith &retval, i, &n_row_alloc_ok, 504b2f1ab58SBarry Smith &n_alloc_used, max_row_nnz); 505b2f1ab58SBarry Smith CHKERRQ(ierr); 506b2f1ab58SBarry Smith ierr = spbas_cholesky_row_alloc( retval, k, max_row_nnz[k], 507b2f1ab58SBarry Smith &n_alloc_used); 508b2f1ab58SBarry Smith r_icol = retval.icols[i]; 509b2f1ab58SBarry Smith r_val = retval.values[i]; 510b2f1ab58SBarry Smith } 511b2f1ab58SBarry Smith CHKERRQ(ierr); 512b2f1ab58SBarry Smith } 513b2f1ab58SBarry Smith 514b2f1ab58SBarry Smith retval.icols[k][retval.row_nnz[k]] = i; 515b2f1ab58SBarry Smith retval.values[k][retval.row_nnz[k]] = val[k]; 516b2f1ab58SBarry Smith retval.row_nnz[k]++; 517b2f1ab58SBarry Smith } 518b2f1ab58SBarry Smith val[k] = 0; 519b2f1ab58SBarry Smith } 520b2f1ab58SBarry Smith 521*71d55d18SBarry Smith /*Erase the values used in the work arrays */ 522b2f1ab58SBarry Smith for (j=0; j<r_nnz; j++) { lvec[r_icol[j]] = 0; } 523b2f1ab58SBarry Smith } 524b2f1ab58SBarry Smith 525b2f1ab58SBarry Smith ierr=PetscFree(lvec); CHKERRQ(ierr); ierr=PetscFree(val); CHKERRQ(ierr); 526b2f1ab58SBarry Smith 527b2f1ab58SBarry Smith ierr = spbas_cholesky_garbage_collect( &retval, nrows, &n_row_alloc_ok, 528b2f1ab58SBarry Smith &n_alloc_used, max_row_nnz); 529b2f1ab58SBarry Smith ierr=PetscFree(max_row_nnz); CHKERRQ(ierr); 530b2f1ab58SBarry Smith 531c328eeadSBarry Smith /* Place the inverse of the diagonals in the matrix */ 532b2f1ab58SBarry Smith for (i=0; i<nrows; i++) 533b2f1ab58SBarry Smith { 534b2f1ab58SBarry Smith r_nnz = retval.row_nnz[i]; 535b2f1ab58SBarry Smith retval.values[i][r_nnz-1] = 1.0 / diag[i]; 536b2f1ab58SBarry Smith for (j=0; j<r_nnz-1; j++) 537b2f1ab58SBarry Smith { 538b2f1ab58SBarry Smith retval.values[i][j] *= -1; 539b2f1ab58SBarry Smith } 540b2f1ab58SBarry Smith } 541b2f1ab58SBarry Smith ierr=PetscFree(diag); CHKERRQ(ierr); 542b2f1ab58SBarry Smith 543b2f1ab58SBarry Smith 544b2f1ab58SBarry Smith *matrix_L = retval; 545b2f1ab58SBarry Smith 546b2f1ab58SBarry Smith PetscFunctionReturn(0); 547b2f1ab58SBarry Smith } 548b2f1ab58SBarry Smith 549