1b2f1ab58SBarry Smith 2b2f1ab58SBarry Smith /* 3b2f1ab58SBarry Smith spbas_cholesky_row_alloc: 4b2f1ab58SBarry Smith in the data arrays, find a place where another row may be stored. 5b2f1ab58SBarry Smith Return PETSC_ERR_MEM if there is insufficient space to store the 6b2f1ab58SBarry Smith row, so garbage collection and/or re-allocation may be done. 7b2f1ab58SBarry Smith */ 8b2f1ab58SBarry Smith #undef __FUNCT__ 9b2f1ab58SBarry Smith #define __FUNCT__ "spbas_cholesky_row_alloc" 10b2f1ab58SBarry Smith PetscErrorCode spbas_cholesky_row_alloc(spbas_matrix retval, PetscInt k, PetscInt r_nnz,PetscInt * n_alloc_used) 11b2f1ab58SBarry Smith { 12b2f1ab58SBarry Smith PetscFunctionBegin; 13b2f1ab58SBarry Smith retval.icols[k] = &retval.alloc_icol[*n_alloc_used]; 14b2f1ab58SBarry Smith retval.values[k] = &retval.alloc_val[*n_alloc_used]; 15b2f1ab58SBarry Smith *n_alloc_used += r_nnz; 16b2f1ab58SBarry Smith if (*n_alloc_used > retval.n_alloc_icol) 17b2f1ab58SBarry Smith { 18b2f1ab58SBarry Smith PetscFunctionReturn(PETSC_ERR_MEM); 19b2f1ab58SBarry Smith } 20b2f1ab58SBarry Smith else 21b2f1ab58SBarry Smith { 22b2f1ab58SBarry Smith PetscFunctionReturn(0); 23b2f1ab58SBarry Smith } 24b2f1ab58SBarry Smith } 25b2f1ab58SBarry Smith 26b2f1ab58SBarry Smith 27b2f1ab58SBarry Smith /* 28b2f1ab58SBarry Smith spbas_cholesky_garbage_collect: 29b2f1ab58SBarry Smith move the rows which have been calculated so far, as well as 30b2f1ab58SBarry Smith those currently under construction, to the front of the 31b2f1ab58SBarry Smith array, while putting them in the proper order. 32b2f1ab58SBarry Smith When it seems necessary, enlarge the current arrays. 33b2f1ab58SBarry Smith 34b2f1ab58SBarry Smith NB: re-allocation is being simulated using 35b2f1ab58SBarry Smith PetscMalloc, memcpy, PetscFree, because 36b2f1ab58SBarry Smith PetscRealloc does not seem to exist. 37b2f1ab58SBarry Smith 38b2f1ab58SBarry Smith */ 39b2f1ab58SBarry Smith #undef __FUNCT__ 40b2f1ab58SBarry Smith #define __FUNCT__ "spbas_cholesky_garbage_collect" 41b2f1ab58SBarry Smith PetscErrorCode spbas_cholesky_garbage_collect( 42b2f1ab58SBarry Smith spbas_matrix * result, // I/O: the Cholesky factor matrix being constructed. 43b2f1ab58SBarry Smith // Only the storage, not the contents of this matrix 44b2f1ab58SBarry Smith // is changed in this function 45b2f1ab58SBarry Smith PetscInt i_row, // I : Number of rows for which the final contents are 46b2f1ab58SBarry Smith // known 47b2f1ab58SBarry Smith PetscInt *n_row_alloc_ok, // I/O: Number of rows which are already in their final 48b2f1ab58SBarry Smith // places in the arrays: they need not be moved any more 49b2f1ab58SBarry Smith PetscInt *n_alloc_used, // I/O: 50b2f1ab58SBarry Smith PetscInt *max_row_nnz // I : Over-estimate of the number of nonzeros needed to 51b2f1ab58SBarry Smith // store each row 52b2f1ab58SBarry Smith ) 53b2f1ab58SBarry Smith { 54b2f1ab58SBarry Smith 55b2f1ab58SBarry Smith #undef garbage_debug 56b2f1ab58SBarry Smith 57b2f1ab58SBarry Smith // PSEUDO-CODE: 58b2f1ab58SBarry Smith // 1. Choose the appropriate size for the arrays 59b2f1ab58SBarry Smith // 2. Rescue the arrays which would be lost during garbage collection 60b2f1ab58SBarry Smith // 3. Reallocate and correct administration 61b2f1ab58SBarry Smith // 4. Move all arrays so that they are in proper order 62b2f1ab58SBarry Smith 63b2f1ab58SBarry Smith PetscInt i,j; 64b2f1ab58SBarry Smith PetscInt nrows = result->nrows; 65b2f1ab58SBarry Smith PetscInt n_alloc_ok=0; 66b2f1ab58SBarry Smith PetscInt n_alloc_ok_max=0; 67b2f1ab58SBarry Smith 68b2f1ab58SBarry Smith PetscErrorCode ierr; 69b2f1ab58SBarry Smith PetscInt need_already= 0; 70b2f1ab58SBarry Smith PetscInt n_rows_ahead=0; 71b2f1ab58SBarry Smith PetscInt max_need_extra= 0; 72b2f1ab58SBarry Smith PetscInt n_alloc_max, n_alloc_est, n_alloc; 73b2f1ab58SBarry Smith PetscInt n_alloc_now = result->n_alloc_icol; 74b2f1ab58SBarry Smith PetscInt *alloc_icol_old = result->alloc_icol; 75b2f1ab58SBarry Smith PetscScalar *alloc_val_old = result->alloc_val; 76b2f1ab58SBarry Smith PetscInt *icol_rescue; 77b2f1ab58SBarry Smith PetscScalar *val_rescue; 78b2f1ab58SBarry Smith PetscInt n_rescue; 79b2f1ab58SBarry Smith PetscInt n_row_rescue; 80b2f1ab58SBarry Smith PetscInt i_here, i_last, n_copy; 81d36aa34eSBarry Smith const PetscReal xtra_perc = 20; 82b2f1ab58SBarry Smith 83b2f1ab58SBarry Smith PetscFunctionBegin; 84b2f1ab58SBarry Smith 85b2f1ab58SBarry Smith //********************************************************* 86b2f1ab58SBarry Smith // 1. Choose appropriate array size 87b2f1ab58SBarry Smith // Count number of rows and memory usage which is already final 88b2f1ab58SBarry Smith for (i=0;i<i_row; i++) 89b2f1ab58SBarry Smith { 90b2f1ab58SBarry Smith n_alloc_ok += result->row_nnz[i]; 91b2f1ab58SBarry Smith n_alloc_ok_max += max_row_nnz[i]; 92b2f1ab58SBarry Smith } 93b2f1ab58SBarry Smith 94b2f1ab58SBarry Smith // Count currently needed memory usage and future memory requirements 95b2f1ab58SBarry Smith // (max, predicted) 96b2f1ab58SBarry Smith for (i=i_row; i<nrows; i++) 97b2f1ab58SBarry Smith { 98b2f1ab58SBarry Smith if (result->row_nnz[i]==0) 99b2f1ab58SBarry Smith { 100b2f1ab58SBarry Smith max_need_extra += max_row_nnz[i]; 101b2f1ab58SBarry Smith } 102b2f1ab58SBarry Smith else 103b2f1ab58SBarry Smith { 104b2f1ab58SBarry Smith need_already += max_row_nnz[i]; 105b2f1ab58SBarry Smith n_rows_ahead++; 106b2f1ab58SBarry Smith } 107b2f1ab58SBarry Smith } 108b2f1ab58SBarry Smith 109b2f1ab58SBarry Smith // Make maximal and realistic memory requirement estimates 110b2f1ab58SBarry Smith n_alloc_max = n_alloc_ok + need_already + max_need_extra; 111d36aa34eSBarry Smith n_alloc_est = n_alloc_ok + need_already + (int) (((PetscReal) max_need_extra) * 112d36aa34eSBarry Smith ((PetscReal) n_alloc_ok) /((PetscReal) n_alloc_ok_max)); 113b2f1ab58SBarry Smith 114b2f1ab58SBarry Smith // Choose array sizes 115b2f1ab58SBarry Smith if (n_alloc_max == n_alloc_est) 116b2f1ab58SBarry Smith { n_alloc = n_alloc_max; } 117b2f1ab58SBarry Smith else if (n_alloc_now >= n_alloc_est) 118b2f1ab58SBarry Smith { n_alloc = n_alloc_now; } 119b2f1ab58SBarry Smith else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) 120b2f1ab58SBarry Smith { n_alloc = n_alloc_max; } 121b2f1ab58SBarry Smith else 122b2f1ab58SBarry Smith { n_alloc = (int) (n_alloc_est * (1+xtra_perc/100.0)); } 123b2f1ab58SBarry Smith 124b2f1ab58SBarry Smith // If new estimate is less than what we already have, 125b2f1ab58SBarry Smith // don't reallocate, just garbage-collect 126b2f1ab58SBarry Smith if (n_alloc_max != n_alloc_est && n_alloc < result->n_alloc_icol) 127b2f1ab58SBarry Smith { 128b2f1ab58SBarry Smith n_alloc = result->n_alloc_icol; 129b2f1ab58SBarry Smith } 130b2f1ab58SBarry Smith 131b2f1ab58SBarry Smith #ifdef garbage_debug 132b2f1ab58SBarry Smith // Print allocation considerations 133b2f1ab58SBarry Smith printf(" + Final version of rows 1..%d is available\n",i_row); 134b2f1ab58SBarry Smith printf(" They have %d nonzeros: %6.2f per row\n",n_alloc_ok, 135b2f1ab58SBarry Smith (PetscScalar) n_alloc_ok / (PetscScalar)i_row); 136b2f1ab58SBarry Smith printf(" Without droptol, I would have had %d nonzeros\n", 137b2f1ab58SBarry Smith n_alloc_ok_max); 138b2f1ab58SBarry Smith printf(" So I use %6.2f%% of the allowed sparseness\n", 139b2f1ab58SBarry Smith 100.0 * (PetscScalar) n_alloc_ok / 140b2f1ab58SBarry Smith (PetscScalar) n_alloc_ok_max); 141b2f1ab58SBarry Smith printf(" + I have %d rows for which I need full sparseness pattern\n", 142b2f1ab58SBarry Smith n_rows_ahead); 143b2f1ab58SBarry Smith printf(" They require %d nonzeros\n", 144b2f1ab58SBarry Smith need_already); 145b2f1ab58SBarry Smith printf(" + After that, I shall need %d more rows\n", 146b2f1ab58SBarry Smith nrows-n_rows_ahead-i_row); 147b2f1ab58SBarry Smith printf(" Maximally, I will need %d nonzeros there\n", 148b2f1ab58SBarry Smith max_need_extra); 149b2f1ab58SBarry Smith printf(" Realistically, I expect to need %6.2f nonzeros there\n", 150b2f1ab58SBarry Smith (PetscScalar) max_need_extra * 151b2f1ab58SBarry Smith (PetscScalar) n_alloc_ok / 152b2f1ab58SBarry Smith (PetscScalar) n_alloc_ok_max); 153b2f1ab58SBarry Smith printf(" + New allocated arrays should be %d (max) or %d (realistic) long\n", 154b2f1ab58SBarry Smith n_alloc_max, n_alloc_est); 155b2f1ab58SBarry Smith 156b2f1ab58SBarry Smith #endif 157b2f1ab58SBarry Smith // Motivate dimension choice 158b2f1ab58SBarry Smith printf(" Allocating %d nonzeros: ",n_alloc); 159b2f1ab58SBarry Smith if (n_alloc_max == n_alloc_est) 160b2f1ab58SBarry Smith { printf("this is the correct size\n"); } 161b2f1ab58SBarry Smith else if (n_alloc_now >= n_alloc_est) 162b2f1ab58SBarry Smith { printf("the current size, which seems enough\n"); } 163b2f1ab58SBarry Smith else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) 164b2f1ab58SBarry Smith { printf("the maximum estimate\n"); } 165b2f1ab58SBarry Smith else 166b2f1ab58SBarry Smith { printf("%6.2f %% more than the estimate\n",xtra_perc); } 167b2f1ab58SBarry Smith 168b2f1ab58SBarry Smith 169b2f1ab58SBarry Smith //********************************************************* 170b2f1ab58SBarry Smith // 2. Rescue arrays which would be lost 171b2f1ab58SBarry Smith // Count how many rows and nonzeros will have to be rescued 172b2f1ab58SBarry Smith // when moving all arrays in place 173b2f1ab58SBarry Smith n_row_rescue = 0; n_rescue = 0; 174b2f1ab58SBarry Smith if (*n_row_alloc_ok==0) { *n_alloc_used = 0; } 175b2f1ab58SBarry Smith else 176b2f1ab58SBarry Smith { 177b2f1ab58SBarry Smith i = *n_row_alloc_ok - 1; 178b2f1ab58SBarry Smith *n_alloc_used = (result->icols[i]-result->alloc_icol) + 179b2f1ab58SBarry Smith result->row_nnz[i]; 180b2f1ab58SBarry Smith } 181b2f1ab58SBarry Smith 182b2f1ab58SBarry Smith #ifdef garbage_debug 183b2f1ab58SBarry Smith printf("\n\n"); 184b2f1ab58SBarry Smith printf("Rows 0..%d are already OK\n", *n_row_alloc_ok); 185b2f1ab58SBarry Smith printf("Rows 0..%d have the right dimensions\n", i_row-1); 186b2f1ab58SBarry Smith #endif 187b2f1ab58SBarry Smith 188b2f1ab58SBarry Smith for (i=*n_row_alloc_ok; i<nrows; i++) 189b2f1ab58SBarry Smith { 190b2f1ab58SBarry Smith i_here = result->icols[i]-result->alloc_icol; 191b2f1ab58SBarry Smith i_last = i_here + result->row_nnz[i]; 192b2f1ab58SBarry Smith if (result->row_nnz[i]>0) 193b2f1ab58SBarry Smith { 194b2f1ab58SBarry Smith if (*n_alloc_used > i_here || i_last > n_alloc) 195b2f1ab58SBarry Smith { 196b2f1ab58SBarry Smith n_rescue += result->row_nnz[i]; 197b2f1ab58SBarry Smith n_row_rescue++; 198b2f1ab58SBarry Smith //printf(" Rescue row %d: %d nonzeros\n",i,result->row_nnz[i]); 199b2f1ab58SBarry Smith //printf(" (New start at %d, old start at %d)\n", 200b2f1ab58SBarry Smith // *n_alloc_used, i_here); 201b2f1ab58SBarry Smith } 202b2f1ab58SBarry Smith 203b2f1ab58SBarry Smith if (i<i_row) { *n_alloc_used += result->row_nnz[i];} 204b2f1ab58SBarry Smith else { *n_alloc_used += max_row_nnz[i];} 205b2f1ab58SBarry Smith } 206b2f1ab58SBarry Smith } 207b2f1ab58SBarry Smith 208b2f1ab58SBarry Smith #ifdef garbage_debug 209b2f1ab58SBarry Smith printf(" + %d arrays will have to be rescued: %d nnz\n", 210b2f1ab58SBarry Smith n_row_rescue, n_rescue); 211b2f1ab58SBarry Smith #endif 212b2f1ab58SBarry Smith 213b2f1ab58SBarry Smith // Allocate rescue arrays 214b2f1ab58SBarry Smith ierr = PetscMalloc( n_rescue * sizeof(int), &icol_rescue); 215b2f1ab58SBarry Smith CHKERRQ(ierr); 216b2f1ab58SBarry Smith ierr = PetscMalloc( n_rescue * sizeof(PetscScalar), &val_rescue); 217b2f1ab58SBarry Smith CHKERRQ(ierr); 218b2f1ab58SBarry Smith 219b2f1ab58SBarry Smith // Rescue the arrays which need rescuing 220b2f1ab58SBarry Smith n_row_rescue = 0; n_rescue = 0; 221b2f1ab58SBarry Smith if (*n_row_alloc_ok==0) { *n_alloc_used = 0; } 222b2f1ab58SBarry Smith else 223b2f1ab58SBarry Smith { 224b2f1ab58SBarry Smith i = *n_row_alloc_ok - 1; 225b2f1ab58SBarry Smith *n_alloc_used = (result->icols[i]-result->alloc_icol) + 226b2f1ab58SBarry Smith result->row_nnz[i]; 227b2f1ab58SBarry Smith } 228b2f1ab58SBarry Smith 229b2f1ab58SBarry Smith for (i=*n_row_alloc_ok; i<nrows; i++) 230b2f1ab58SBarry Smith { 231b2f1ab58SBarry Smith i_here = result->icols[i]-result->alloc_icol; 232b2f1ab58SBarry Smith i_last = i_here + result->row_nnz[i]; 233b2f1ab58SBarry Smith if (result->row_nnz[i]>0) 234b2f1ab58SBarry Smith { 235b2f1ab58SBarry Smith if (*n_alloc_used > i_here || i_last > n_alloc) 236b2f1ab58SBarry Smith { 237b2f1ab58SBarry Smith //printf(" Rescuing row %d to array[%d..]\n",i,n_rescue); 238b2f1ab58SBarry Smith memcpy((void*) &icol_rescue[n_rescue], 239b2f1ab58SBarry Smith (void*) result->icols[i], 240b2f1ab58SBarry Smith result->row_nnz[i] * sizeof(int)); 241b2f1ab58SBarry Smith memcpy((void*) &val_rescue[n_rescue], 242b2f1ab58SBarry Smith (void*) result->values[i], 243b2f1ab58SBarry Smith result->row_nnz[i] * sizeof(PetscScalar)); 244b2f1ab58SBarry Smith n_rescue += result->row_nnz[i]; 245b2f1ab58SBarry Smith n_row_rescue++; 246b2f1ab58SBarry Smith } 247b2f1ab58SBarry Smith 248b2f1ab58SBarry Smith if (i<i_row) { *n_alloc_used += result->row_nnz[i];} 249b2f1ab58SBarry Smith else { *n_alloc_used += max_row_nnz[i];} 250b2f1ab58SBarry Smith } 251b2f1ab58SBarry Smith } 252b2f1ab58SBarry Smith 253b2f1ab58SBarry Smith //********************************************************* 254b2f1ab58SBarry Smith // 3. Reallocate and correct administration 255b2f1ab58SBarry Smith 256b2f1ab58SBarry Smith if (n_alloc != result->n_alloc_icol) 257b2f1ab58SBarry Smith { 258b2f1ab58SBarry Smith n_copy = MIN(n_alloc,result->n_alloc_icol); 259b2f1ab58SBarry Smith 260b2f1ab58SBarry Smith // PETSC knows no REALLOC, so we'll REALLOC ourselves. 261b2f1ab58SBarry Smith 262b2f1ab58SBarry Smith // Allocate new icol-data, copy old contents 263b2f1ab58SBarry Smith ierr = PetscMalloc( n_alloc * sizeof(int), 264b2f1ab58SBarry Smith &result->alloc_icol); CHKERRQ(ierr); 265b2f1ab58SBarry Smith memcpy(result->alloc_icol, alloc_icol_old, n_copy*sizeof(int)); 266b2f1ab58SBarry Smith 267b2f1ab58SBarry Smith // Update administration, Reset pointers to new arrays 268b2f1ab58SBarry Smith result->n_alloc_icol = n_alloc; 269b2f1ab58SBarry Smith for (i=0; i<nrows; i++) 270b2f1ab58SBarry Smith { 271b2f1ab58SBarry Smith result->icols[i] = result->alloc_icol + 272b2f1ab58SBarry Smith (result->icols[i] - alloc_icol_old); 273b2f1ab58SBarry Smith result->values[i] = result->alloc_val + 274b2f1ab58SBarry Smith (result->icols[i] - result->alloc_icol); 275b2f1ab58SBarry Smith } 276b2f1ab58SBarry Smith 277b2f1ab58SBarry Smith // Delete old array 278b2f1ab58SBarry Smith ierr = PetscFree(alloc_icol_old); CHKERRQ(ierr); 279b2f1ab58SBarry Smith 280b2f1ab58SBarry Smith // Allocate new value-data, copy old contents 281b2f1ab58SBarry Smith ierr = PetscMalloc( n_alloc * sizeof(PetscScalar), 282b2f1ab58SBarry Smith &result->alloc_val); CHKERRQ(ierr); 283b2f1ab58SBarry Smith memcpy(result->alloc_val, alloc_val_old, n_copy*sizeof(PetscScalar)); 284b2f1ab58SBarry Smith 285b2f1ab58SBarry Smith // Update administration, Reset pointers to new arrays 286b2f1ab58SBarry Smith result->n_alloc_val = n_alloc; 287b2f1ab58SBarry Smith for (i=0; i<nrows; i++) 288b2f1ab58SBarry Smith { 289b2f1ab58SBarry Smith result->values[i] = result->alloc_val + 290b2f1ab58SBarry Smith (result->icols[i] - result->alloc_icol); 291b2f1ab58SBarry Smith } 292b2f1ab58SBarry Smith 293b2f1ab58SBarry Smith // Delete old array 294b2f1ab58SBarry Smith ierr = PetscFree(alloc_val_old); CHKERRQ(ierr); 295b2f1ab58SBarry Smith } 296b2f1ab58SBarry Smith 297b2f1ab58SBarry Smith 298b2f1ab58SBarry Smith //********************************************************* 299b2f1ab58SBarry Smith // 4. Copy all the arrays to their proper places 300b2f1ab58SBarry Smith n_row_rescue = 0; n_rescue = 0; 301b2f1ab58SBarry Smith if (*n_row_alloc_ok==0) { *n_alloc_used = 0; } 302b2f1ab58SBarry Smith else 303b2f1ab58SBarry Smith { 304b2f1ab58SBarry Smith i = *n_row_alloc_ok - 1; 305b2f1ab58SBarry Smith *n_alloc_used = (result->icols[i]-result->alloc_icol) + 306b2f1ab58SBarry Smith result->row_nnz[i]; 307b2f1ab58SBarry Smith } 308b2f1ab58SBarry Smith 309b2f1ab58SBarry Smith for (i=*n_row_alloc_ok; i<nrows; i++) 310b2f1ab58SBarry Smith { 311b2f1ab58SBarry Smith i_here = result->icols[i]-result->alloc_icol; 312b2f1ab58SBarry Smith i_last = i_here + result->row_nnz[i]; 313b2f1ab58SBarry Smith 314b2f1ab58SBarry Smith result->icols[i] = result->alloc_icol + *n_alloc_used; 315b2f1ab58SBarry Smith result->values[i]= result->alloc_val + *n_alloc_used; 316b2f1ab58SBarry Smith 317b2f1ab58SBarry Smith if (result->row_nnz[i]>0) 318b2f1ab58SBarry Smith { 319b2f1ab58SBarry Smith if (*n_alloc_used > i_here || i_last > n_alloc) 320b2f1ab58SBarry Smith { 321b2f1ab58SBarry Smith //printf(" Copying rescued array[%d..] to row %d\n", 322b2f1ab58SBarry Smith // n_rescue,i); 323b2f1ab58SBarry Smith //fflush(stdout); 324b2f1ab58SBarry Smith memcpy((void*) result->icols[i], 325b2f1ab58SBarry Smith (void*) &icol_rescue[n_rescue], 326b2f1ab58SBarry Smith result->row_nnz[i] * sizeof(int)); 327b2f1ab58SBarry Smith memcpy((void*) result->values[i], 328b2f1ab58SBarry Smith (void*) &val_rescue[n_rescue], 329b2f1ab58SBarry Smith result->row_nnz[i] * sizeof(PetscScalar)); 330b2f1ab58SBarry Smith n_rescue += result->row_nnz[i]; 331b2f1ab58SBarry Smith n_row_rescue++; 332b2f1ab58SBarry Smith } 333b2f1ab58SBarry Smith else 334b2f1ab58SBarry Smith { 335b2f1ab58SBarry Smith for (j=0; j<result->row_nnz[i]; j++) 336b2f1ab58SBarry Smith { 337b2f1ab58SBarry Smith result->icols[i][j] = result->alloc_icol[i_here+j]; 338b2f1ab58SBarry Smith result->values[i][j] = result->alloc_val[ i_here+j]; 339b2f1ab58SBarry Smith } 340b2f1ab58SBarry Smith } 341b2f1ab58SBarry Smith if (i<i_row) { *n_alloc_used += result->row_nnz[i];} 342b2f1ab58SBarry Smith else { *n_alloc_used += max_row_nnz[i];} 343b2f1ab58SBarry Smith } 344b2f1ab58SBarry Smith } 345b2f1ab58SBarry Smith 346b2f1ab58SBarry Smith // Delete the rescue arrays 347b2f1ab58SBarry Smith ierr = PetscFree(icol_rescue); CHKERRQ(ierr); 348b2f1ab58SBarry Smith ierr = PetscFree(val_rescue); CHKERRQ(ierr); 349b2f1ab58SBarry Smith *n_row_alloc_ok = i_row; 350b2f1ab58SBarry Smith 351b2f1ab58SBarry Smith PetscFunctionReturn(0); 352b2f1ab58SBarry Smith 353b2f1ab58SBarry Smith } 354b2f1ab58SBarry Smith 355b2f1ab58SBarry Smith /* 356b2f1ab58SBarry Smith spbas_incomplete_cholesky: 357b2f1ab58SBarry Smith incomplete Cholesky decomposition of a square, symmetric, 358b2f1ab58SBarry Smith positive definite matrix. 359b2f1ab58SBarry Smith 360b2f1ab58SBarry Smith In case negative diagonals are encountered, function returns 361b2f1ab58SBarry Smith NEGATIVE_DIAGONAL. When this happens, call this function again 362b2f1ab58SBarry Smith with a larger epsdiag_in, a less sparse pattern, and/or a smaller 363b2f1ab58SBarry Smith droptol 364b2f1ab58SBarry Smith */ 365b2f1ab58SBarry Smith #undef __FUNCT__ 366b2f1ab58SBarry Smith #define __FUNCT__ "spbas_incomplete_cholesky" 367b2f1ab58SBarry Smith PetscErrorCode spbas_incomplete_cholesky( 368b2f1ab58SBarry Smith Mat A, const PetscInt *rip, const PetscInt *riip, 369b2f1ab58SBarry Smith spbas_matrix pattern, 370d36aa34eSBarry Smith PetscReal droptol, PetscReal epsdiag_in, 371b2f1ab58SBarry Smith spbas_matrix * matrix_L) 372b2f1ab58SBarry Smith { 373b2f1ab58SBarry Smith 374b2f1ab58SBarry Smith #undef cholesky_debug 375b2f1ab58SBarry Smith 376b2f1ab58SBarry Smith #ifdef cholesky_debug 377b2f1ab58SBarry Smith PetscInt idebug=0; 378b2f1ab58SBarry Smith const char * fmt= " (%d,%d) -> %12.8e\n"; 379b2f1ab58SBarry Smith PetscScalar tmp; 380b2f1ab58SBarry Smith #endif 381b2f1ab58SBarry Smith 382b2f1ab58SBarry Smith PetscInt jL; 383b2f1ab58SBarry Smith Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 384b2f1ab58SBarry Smith PetscInt *ai=a->i,*aj=a->j; 385b2f1ab58SBarry Smith MatScalar *aa=a->a; 386b2f1ab58SBarry Smith PetscInt nrows, ncols; 387b2f1ab58SBarry Smith PetscInt *max_row_nnz; 388b2f1ab58SBarry Smith PetscErrorCode ierr; 389b2f1ab58SBarry Smith spbas_matrix retval; 390b2f1ab58SBarry Smith PetscScalar * diag; 391b2f1ab58SBarry Smith PetscScalar * val; 392b2f1ab58SBarry Smith PetscScalar * lvec; 393b2f1ab58SBarry Smith PetscScalar epsdiag; 394b2f1ab58SBarry Smith PetscInt i,j,k; 395b2f1ab58SBarry Smith const PetscTruth do_values = PETSC_TRUE; 396b2f1ab58SBarry Smith PetscInt * r1_icol; PetscScalar *r1_val; 397b2f1ab58SBarry Smith PetscInt * r_icol; PetscInt r_nnz; PetscScalar *r_val; 398b2f1ab58SBarry Smith PetscInt * A_icol; PetscInt A_nnz; PetscScalar *A_val; 399b2f1ab58SBarry Smith PetscInt * p_icol; PetscInt p_nnz; 400b2f1ab58SBarry Smith PetscInt n_row_alloc_ok = 0; // number of rows which have been stored 401b2f1ab58SBarry Smith // correctly in the matrix 402b2f1ab58SBarry Smith PetscInt n_alloc_used = 0; // part of result->icols and result->values 403b2f1ab58SBarry Smith // which is currently being used 404b2f1ab58SBarry Smith PetscFunctionBegin; 405b2f1ab58SBarry Smith 406b2f1ab58SBarry Smith // Convert the Manteuffel shift from 'fraction of average diagonal' to 407b2f1ab58SBarry Smith // dimensioned value 408b2f1ab58SBarry Smith ierr = MatGetSize(A, &nrows, &ncols); CHKERRQ(ierr); 409b2f1ab58SBarry Smith ierr = MatGetTrace(A, &epsdiag); CHKERRQ(ierr); 410b2f1ab58SBarry Smith epsdiag *= epsdiag_in / nrows; 411*9767453cSBarry Smith ierr = PetscInfo2(PETSC_NULL," Dimensioned Manteuffel shift %G Drop tolerance %G\n", PetscRealPart(epsdiag),droptol);CHKERRQ(ierr); 412b2f1ab58SBarry Smith 413b2f1ab58SBarry Smith if (droptol<1e-10) {droptol=1e-10;} 414b2f1ab58SBarry Smith 415b2f1ab58SBarry Smith // Check dimensions 416b2f1ab58SBarry Smith if ( (nrows != pattern.nrows) || 417b2f1ab58SBarry Smith (ncols != pattern.ncols) || 418b2f1ab58SBarry Smith (ncols != nrows) ) 419b2f1ab58SBarry Smith { 420b2f1ab58SBarry Smith SETERRQ( PETSC_ERR_ARG_INCOMP, 421b2f1ab58SBarry Smith "Dimension error in spbas_incomplete_cholesky\n"); 422b2f1ab58SBarry Smith } 423b2f1ab58SBarry Smith 424b2f1ab58SBarry Smith // Set overall values 425b2f1ab58SBarry Smith retval.nrows = nrows; 426b2f1ab58SBarry Smith retval.ncols = nrows; 427b2f1ab58SBarry Smith retval.nnz = pattern.nnz/10; 428b2f1ab58SBarry Smith retval.col_idx_type = SPBAS_COLUMN_NUMBERS; 429b2f1ab58SBarry Smith retval.block_data = PETSC_TRUE; 430b2f1ab58SBarry Smith 431b2f1ab58SBarry Smith // Allocate sparseness pattern 432b2f1ab58SBarry Smith ierr = spbas_allocate_pattern(&retval, do_values); CHKERRQ(ierr); 433b2f1ab58SBarry Smith // Initial allocation of data 434b2f1ab58SBarry Smith memset((void*) retval.row_nnz, 0, nrows*sizeof(int)); 435b2f1ab58SBarry Smith ierr = spbas_allocate_data(&retval); CHKERRQ(ierr); 436b2f1ab58SBarry Smith retval.nnz = 0; 437b2f1ab58SBarry Smith 438b2f1ab58SBarry Smith // Allocate work arrays 439b2f1ab58SBarry Smith ierr = PetscMalloc(nrows*sizeof(PetscScalar), &diag); CHKERRQ(ierr); 440b2f1ab58SBarry Smith ierr = PetscMalloc(nrows*sizeof(PetscScalar), &val); CHKERRQ(ierr); 441b2f1ab58SBarry Smith ierr = PetscMalloc(nrows*sizeof(PetscScalar), &lvec); CHKERRQ(ierr); 442b2f1ab58SBarry Smith ierr = PetscMalloc(nrows*sizeof(int), &max_row_nnz); CHKERRQ(ierr); 443b2f1ab58SBarry Smith if (!(diag && val && lvec && max_row_nnz)) 444b2f1ab58SBarry Smith { 445b2f1ab58SBarry Smith SETERRQ( PETSC_ERR_MEM, 446b2f1ab58SBarry Smith "Allocation error in spbas_incomplete_cholesky\n"); 447b2f1ab58SBarry Smith } 448b2f1ab58SBarry Smith 449b2f1ab58SBarry Smith memset((void*) val, 0, nrows*sizeof(PetscScalar)); 450b2f1ab58SBarry Smith memset((void*) lvec, 0, nrows*sizeof(PetscScalar)); 451b2f1ab58SBarry Smith memset((void*) max_row_nnz, 0, nrows*sizeof(int)); 452b2f1ab58SBarry Smith 453b2f1ab58SBarry Smith // Check correct format of sparseness pattern 454b2f1ab58SBarry Smith if (pattern.col_idx_type != SPBAS_DIAGONAL_OFFSETS) 455b2f1ab58SBarry Smith { 456b2f1ab58SBarry Smith SETERRQ( PETSC_ERR_SUP_SYS, 457b2f1ab58SBarry Smith "Error in spbas_incomplete_cholesky: must have diagonal offsets in pattern\n"); 458b2f1ab58SBarry Smith } 459b2f1ab58SBarry Smith 460b2f1ab58SBarry Smith // Count the nonzeros on transpose of pattern 461b2f1ab58SBarry Smith for (i = 0; i<nrows; i++) 462b2f1ab58SBarry Smith { 463b2f1ab58SBarry Smith p_nnz = pattern.row_nnz[i]; 464b2f1ab58SBarry Smith p_icol = pattern.icols[i]; 465b2f1ab58SBarry Smith for (j=0; j<p_nnz; j++) 466b2f1ab58SBarry Smith { 467b2f1ab58SBarry Smith max_row_nnz[i+p_icol[j]]++; 468b2f1ab58SBarry Smith } 469b2f1ab58SBarry Smith } 470b2f1ab58SBarry Smith 471b2f1ab58SBarry Smith // Calculate rows of L 472b2f1ab58SBarry Smith for (i = 0; i<nrows; i++) 473b2f1ab58SBarry Smith { 474b2f1ab58SBarry Smith p_nnz = pattern.row_nnz[i]; 475b2f1ab58SBarry Smith p_icol = pattern.icols[i]; 476b2f1ab58SBarry Smith 477b2f1ab58SBarry Smith r_nnz = retval.row_nnz[i]; 478b2f1ab58SBarry Smith r_icol = retval.icols[i]; 479b2f1ab58SBarry Smith r_val = retval.values[i]; 480b2f1ab58SBarry Smith A_nnz = ai[rip[i]+1] - ai[rip[i]]; 481b2f1ab58SBarry Smith A_icol = &aj[ai[rip[i]]]; 482b2f1ab58SBarry Smith A_val = &aa[ai[rip[i]]]; 483b2f1ab58SBarry Smith 484b2f1ab58SBarry Smith // Calculate val += A(i,i:n)'; 485b2f1ab58SBarry Smith for (j=0; j<A_nnz; j++) 486b2f1ab58SBarry Smith { 487b2f1ab58SBarry Smith k = riip[A_icol[j]]; 488b2f1ab58SBarry Smith if (k>=i) { val[k] = A_val[j]; } 489b2f1ab58SBarry Smith } 490b2f1ab58SBarry Smith 491b2f1ab58SBarry Smith // Add regularization 492b2f1ab58SBarry Smith val[i] += epsdiag; 493b2f1ab58SBarry Smith 494b2f1ab58SBarry Smith // Calculate lvec = diag(D(0:i-1)) * L(0:i-1,i); 495b2f1ab58SBarry Smith // val(i) = A(i,i) - L(0:i-1,i)' * lvec 496b2f1ab58SBarry Smith for ( j=0; j<r_nnz; j++) 497b2f1ab58SBarry Smith { 498b2f1ab58SBarry Smith k = r_icol[j]; 499b2f1ab58SBarry Smith lvec[k] = diag[k] * r_val[j]; 500b2f1ab58SBarry Smith val[i] -= r_val[j] * lvec[k]; 501b2f1ab58SBarry Smith } 502b2f1ab58SBarry Smith 503b2f1ab58SBarry Smith // Calculate the new diagonal 504b2f1ab58SBarry Smith diag[i] = val[i]; 505*9767453cSBarry Smith if (PetscRealPart(diag[i])<droptol) 506b2f1ab58SBarry Smith { 507b2f1ab58SBarry Smith printf("Error in spbas_incomplete_cholesky:\n"); 508b2f1ab58SBarry Smith printf("Negative diagonal in row %d\n",i+1); 509b2f1ab58SBarry Smith 510b2f1ab58SBarry Smith // Delete the whole matrix at once. 511b2f1ab58SBarry Smith ierr = spbas_delete(retval); 512b2f1ab58SBarry Smith return NEGATIVE_DIAGONAL; 513b2f1ab58SBarry Smith } 514b2f1ab58SBarry Smith 515b2f1ab58SBarry Smith #ifdef cholesky_debug 516b2f1ab58SBarry Smith if (idebug>=3) 517b2f1ab58SBarry Smith { 518b2f1ab58SBarry Smith printf("diagD * L (%d,:)=\n",i+1); 519b2f1ab58SBarry Smith for (j=0; j<r_nnz; j++) 520b2f1ab58SBarry Smith { 521b2f1ab58SBarry Smith printf(fmt,r_icol[j]+1,i+1,lvec[r_icol[j]]); 522b2f1ab58SBarry Smith } 523b2f1ab58SBarry Smith printf("\n\n"); 524b2f1ab58SBarry Smith } 525b2f1ab58SBarry Smith 526b2f1ab58SBarry Smith if (idebug>=1) { printf("Diag = %12.8e\n\n\n",diag[i]);} 527b2f1ab58SBarry Smith 528b2f1ab58SBarry Smith if (idebug>=2) 529b2f1ab58SBarry Smith { 530b2f1ab58SBarry Smith printf("L^T * diagD * L=\n"); 531b2f1ab58SBarry Smith tmp = 0.0; 532b2f1ab58SBarry Smith for ( j=0; j<r_nnz; j++) 533b2f1ab58SBarry Smith { 534b2f1ab58SBarry Smith k = r_icol[j]; 535b2f1ab58SBarry Smith tmp += r_val[j] * lvec[k]; 536b2f1ab58SBarry Smith } 537b2f1ab58SBarry Smith printf(fmt, i+1,i+1,tmp); 538b2f1ab58SBarry Smith } 539b2f1ab58SBarry Smith #endif 540b2f1ab58SBarry Smith 541b2f1ab58SBarry Smith // If necessary, allocate arrays 542b2f1ab58SBarry Smith if (r_nnz==0) 543b2f1ab58SBarry Smith { 544b2f1ab58SBarry Smith ierr = spbas_cholesky_row_alloc( retval, i, 1, &n_alloc_used); 545b2f1ab58SBarry Smith if (ierr == PETSC_ERR_MEM) 546b2f1ab58SBarry Smith { 547b2f1ab58SBarry Smith ierr = spbas_cholesky_garbage_collect( &retval, i, &n_row_alloc_ok, 548b2f1ab58SBarry Smith &n_alloc_used, max_row_nnz); 549b2f1ab58SBarry Smith CHKERRQ(ierr); 550b2f1ab58SBarry Smith ierr = spbas_cholesky_row_alloc( retval, i, 1, &n_alloc_used); 551b2f1ab58SBarry Smith } 552b2f1ab58SBarry Smith CHKERRQ(ierr); 553b2f1ab58SBarry Smith r_icol = retval.icols[i]; 554b2f1ab58SBarry Smith r_val = retval.values[i]; 555b2f1ab58SBarry Smith } 556b2f1ab58SBarry Smith 557b2f1ab58SBarry Smith // Now, fill in 558b2f1ab58SBarry Smith r_icol[r_nnz] = i; 559b2f1ab58SBarry Smith r_val [r_nnz] = 1.0; 560b2f1ab58SBarry Smith r_nnz++; 561b2f1ab58SBarry Smith retval.row_nnz[i]++; 562b2f1ab58SBarry Smith 563b2f1ab58SBarry Smith retval.nnz += r_nnz; 564b2f1ab58SBarry Smith 565b2f1ab58SBarry Smith // Calculate 566b2f1ab58SBarry Smith // val(i+1:n) = (A(i,i+1:n)- L(0:i-1,i+1:n)' * lvec)/diag(i) 567b2f1ab58SBarry Smith for (j=1; j<p_nnz; j++) 568b2f1ab58SBarry Smith { 569b2f1ab58SBarry Smith k = i+p_icol[j]; 570b2f1ab58SBarry Smith r1_icol = retval.icols[k]; 571b2f1ab58SBarry Smith r1_val = retval.values[k]; 572b2f1ab58SBarry Smith #ifdef cholesky_debug 573b2f1ab58SBarry Smith tmp = 0.0; 574b2f1ab58SBarry Smith #endif 575b2f1ab58SBarry Smith for (jL=0; jL<retval.row_nnz[k]; jL++) 576b2f1ab58SBarry Smith { 577b2f1ab58SBarry Smith val[k] -= r1_val[jL] * lvec[r1_icol[jL]]; 578b2f1ab58SBarry Smith #ifdef cholesky_debug 579b2f1ab58SBarry Smith tmp += r1_val[jL] * lvec[r1_icol[jL]]; 580b2f1ab58SBarry Smith #endif 581b2f1ab58SBarry Smith } 582b2f1ab58SBarry Smith #ifdef cholesky_debug 583b2f1ab58SBarry Smith if (idebug>=2 && tmp!=0) {printf(fmt, k+1,i+1,tmp);} 584b2f1ab58SBarry Smith #endif 585b2f1ab58SBarry Smith val[k] /= diag[i]; 586b2f1ab58SBarry Smith 587d36aa34eSBarry Smith if (PetscAbsScalar(val[k]) > droptol || PetscAbsScalar(val[k])< -droptol) 588b2f1ab58SBarry Smith { 589b2f1ab58SBarry Smith // If necessary, allocate arrays 590b2f1ab58SBarry Smith if (retval.row_nnz[k]==0) 591b2f1ab58SBarry Smith { 592b2f1ab58SBarry Smith ierr = spbas_cholesky_row_alloc( retval, k, max_row_nnz[k], 593b2f1ab58SBarry Smith &n_alloc_used); 594b2f1ab58SBarry Smith if (ierr == PETSC_ERR_MEM) 595b2f1ab58SBarry Smith { 596b2f1ab58SBarry Smith ierr = spbas_cholesky_garbage_collect( 597b2f1ab58SBarry Smith &retval, i, &n_row_alloc_ok, 598b2f1ab58SBarry Smith &n_alloc_used, max_row_nnz); 599b2f1ab58SBarry Smith CHKERRQ(ierr); 600b2f1ab58SBarry Smith ierr = spbas_cholesky_row_alloc( retval, k, max_row_nnz[k], 601b2f1ab58SBarry Smith &n_alloc_used); 602b2f1ab58SBarry Smith r_icol = retval.icols[i]; 603b2f1ab58SBarry Smith r_val = retval.values[i]; 604b2f1ab58SBarry Smith } 605b2f1ab58SBarry Smith CHKERRQ(ierr); 606b2f1ab58SBarry Smith } 607b2f1ab58SBarry Smith 608b2f1ab58SBarry Smith // Now, fill in 609b2f1ab58SBarry Smith retval.icols[k][retval.row_nnz[k]] = i; 610b2f1ab58SBarry Smith retval.values[k][retval.row_nnz[k]] = val[k]; 611b2f1ab58SBarry Smith retval.row_nnz[k]++; 612b2f1ab58SBarry Smith } 613b2f1ab58SBarry Smith val[k] = 0; 614b2f1ab58SBarry Smith } 615b2f1ab58SBarry Smith #ifdef cholesky_debug 616b2f1ab58SBarry Smith if (idebug>=2) {printf("\n\n");} 617b2f1ab58SBarry Smith #endif 618b2f1ab58SBarry Smith 619b2f1ab58SBarry Smith // Erase the values used in the work arrays 620b2f1ab58SBarry Smith for (j=0; j<r_nnz; j++) { lvec[r_icol[j]] = 0; } 621b2f1ab58SBarry Smith 622b2f1ab58SBarry Smith #ifdef cholesky_debug 623b2f1ab58SBarry Smith if (idebug>=2) 624b2f1ab58SBarry Smith { 625b2f1ab58SBarry Smith printf("new L=\n"); 626b2f1ab58SBarry Smith for (jL=i; jL<nrows; jL++) 627b2f1ab58SBarry Smith { 628b2f1ab58SBarry Smith j = retval.row_nnz[jL]-1; 629b2f1ab58SBarry Smith if (j>=0) 630b2f1ab58SBarry Smith { 631b2f1ab58SBarry Smith k = retval.icols[jL][j]; 632b2f1ab58SBarry Smith if (k==i) printf(fmt, jL+1,i+1,retval.values[jL][j]); 633b2f1ab58SBarry Smith } 634b2f1ab58SBarry Smith } 635b2f1ab58SBarry Smith printf("\n\n"); 636b2f1ab58SBarry Smith } 637b2f1ab58SBarry Smith #endif 638b2f1ab58SBarry Smith } 639b2f1ab58SBarry Smith 640b2f1ab58SBarry Smith ierr=PetscFree(lvec); CHKERRQ(ierr); ierr=PetscFree(val); CHKERRQ(ierr); 641b2f1ab58SBarry Smith 642b2f1ab58SBarry Smith ierr = spbas_cholesky_garbage_collect( &retval, nrows, &n_row_alloc_ok, 643b2f1ab58SBarry Smith &n_alloc_used, max_row_nnz); 644b2f1ab58SBarry Smith ierr=PetscFree(max_row_nnz); CHKERRQ(ierr); 645b2f1ab58SBarry Smith 646b2f1ab58SBarry Smith // Place the inverse of the diagonals in the matrix 647b2f1ab58SBarry Smith for (i=0; i<nrows; i++) 648b2f1ab58SBarry Smith { 649b2f1ab58SBarry Smith r_nnz = retval.row_nnz[i]; 650b2f1ab58SBarry Smith retval.values[i][r_nnz-1] = 1.0 / diag[i]; 651b2f1ab58SBarry Smith for (j=0; j<r_nnz-1; j++) 652b2f1ab58SBarry Smith { 653b2f1ab58SBarry Smith retval.values[i][j] *= -1; 654b2f1ab58SBarry Smith } 655b2f1ab58SBarry Smith } 656b2f1ab58SBarry Smith ierr=PetscFree(diag); CHKERRQ(ierr); 657b2f1ab58SBarry Smith 658b2f1ab58SBarry Smith 659b2f1ab58SBarry Smith // Return successfully 660b2f1ab58SBarry Smith *matrix_L = retval; 661b2f1ab58SBarry Smith 662b2f1ab58SBarry Smith #ifdef cholesky_debug 663b2f1ab58SBarry Smith for (i = 0; i<nrows; i++) 664b2f1ab58SBarry Smith { 665b2f1ab58SBarry Smith printf("Row %d\n",i+1); 666b2f1ab58SBarry Smith for (j=0; j<retval.row_nnz[i]; j++) 667b2f1ab58SBarry Smith { 668b2f1ab58SBarry Smith printf(fmt, i+1,retval.icols[i][j]+1,retval.values[i][j]); 669b2f1ab58SBarry Smith } 670b2f1ab58SBarry Smith } 671b2f1ab58SBarry Smith #endif 672b2f1ab58SBarry Smith PetscFunctionReturn(0); 673b2f1ab58SBarry Smith } 674b2f1ab58SBarry Smith 675