1b2f1ab58SBarry Smith 2b2f1ab58SBarry Smith /* 3b2f1ab58SBarry Smith spbas_cholesky_row_alloc: 4b2f1ab58SBarry Smith in the data arrays, find a place where another row may be stored. 5b2f1ab58SBarry Smith Return PETSC_ERR_MEM if there is insufficient space to store the 6b2f1ab58SBarry Smith row, so garbage collection and/or re-allocation may be done. 7b2f1ab58SBarry Smith */ 8b2f1ab58SBarry Smith #undef __FUNCT__ 9b2f1ab58SBarry Smith #define __FUNCT__ "spbas_cholesky_row_alloc" 10b2f1ab58SBarry Smith PetscErrorCode spbas_cholesky_row_alloc(spbas_matrix retval, PetscInt k, PetscInt r_nnz,PetscInt * n_alloc_used) 11b2f1ab58SBarry Smith { 12b2f1ab58SBarry Smith PetscFunctionBegin; 13b2f1ab58SBarry Smith retval.icols[k] = &retval.alloc_icol[*n_alloc_used]; 14b2f1ab58SBarry Smith retval.values[k] = &retval.alloc_val[*n_alloc_used]; 15b2f1ab58SBarry Smith *n_alloc_used += r_nnz; 16b2f1ab58SBarry Smith if (*n_alloc_used > retval.n_alloc_icol) 17b2f1ab58SBarry Smith { 18b2f1ab58SBarry Smith PetscFunctionReturn(PETSC_ERR_MEM); 19b2f1ab58SBarry Smith } 20b2f1ab58SBarry Smith else 21b2f1ab58SBarry Smith { 22b2f1ab58SBarry Smith PetscFunctionReturn(0); 23b2f1ab58SBarry Smith } 24b2f1ab58SBarry Smith } 25b2f1ab58SBarry Smith 26b2f1ab58SBarry Smith 27b2f1ab58SBarry Smith /* 28b2f1ab58SBarry Smith spbas_cholesky_garbage_collect: 29b2f1ab58SBarry Smith move the rows which have been calculated so far, as well as 30b2f1ab58SBarry Smith those currently under construction, to the front of the 31b2f1ab58SBarry Smith array, while putting them in the proper order. 32b2f1ab58SBarry Smith When it seems necessary, enlarge the current arrays. 33b2f1ab58SBarry Smith 34b2f1ab58SBarry Smith NB: re-allocation is being simulated using 35b2f1ab58SBarry Smith PetscMalloc, memcpy, PetscFree, because 36b2f1ab58SBarry Smith PetscRealloc does not seem to exist. 37b2f1ab58SBarry Smith 38b2f1ab58SBarry Smith */ 39b2f1ab58SBarry Smith #undef __FUNCT__ 40b2f1ab58SBarry Smith #define __FUNCT__ "spbas_cholesky_garbage_collect" 41b2f1ab58SBarry Smith PetscErrorCode spbas_cholesky_garbage_collect( 42b2f1ab58SBarry Smith spbas_matrix * result, // I/O: the Cholesky factor matrix being constructed. 43b2f1ab58SBarry Smith // Only the storage, not the contents of this matrix 44b2f1ab58SBarry Smith // is changed in this function 45b2f1ab58SBarry Smith PetscInt i_row, // I : Number of rows for which the final contents are 46b2f1ab58SBarry Smith // known 47b2f1ab58SBarry Smith PetscInt *n_row_alloc_ok, // I/O: Number of rows which are already in their final 48b2f1ab58SBarry Smith // places in the arrays: they need not be moved any more 49b2f1ab58SBarry Smith PetscInt *n_alloc_used, // I/O: 50b2f1ab58SBarry Smith PetscInt *max_row_nnz // I : Over-estimate of the number of nonzeros needed to 51b2f1ab58SBarry Smith // store each row 52b2f1ab58SBarry Smith ) 53b2f1ab58SBarry Smith { 54b2f1ab58SBarry Smith 55b2f1ab58SBarry Smith #undef garbage_debug 56b2f1ab58SBarry Smith 57b2f1ab58SBarry Smith // PSEUDO-CODE: 58b2f1ab58SBarry Smith // 1. Choose the appropriate size for the arrays 59b2f1ab58SBarry Smith // 2. Rescue the arrays which would be lost during garbage collection 60b2f1ab58SBarry Smith // 3. Reallocate and correct administration 61b2f1ab58SBarry Smith // 4. Move all arrays so that they are in proper order 62b2f1ab58SBarry Smith 63b2f1ab58SBarry Smith PetscInt i,j; 64b2f1ab58SBarry Smith PetscInt nrows = result->nrows; 65b2f1ab58SBarry Smith PetscInt n_alloc_ok=0; 66b2f1ab58SBarry Smith PetscInt n_alloc_ok_max=0; 67b2f1ab58SBarry Smith 68b2f1ab58SBarry Smith PetscErrorCode ierr; 69b2f1ab58SBarry Smith PetscInt need_already= 0; 70b2f1ab58SBarry Smith PetscInt n_rows_ahead=0; 71b2f1ab58SBarry Smith PetscInt max_need_extra= 0; 72b2f1ab58SBarry Smith PetscInt n_alloc_max, n_alloc_est, n_alloc; 73b2f1ab58SBarry Smith PetscInt n_alloc_now = result->n_alloc_icol; 74b2f1ab58SBarry Smith PetscInt *alloc_icol_old = result->alloc_icol; 75b2f1ab58SBarry Smith PetscScalar *alloc_val_old = result->alloc_val; 76b2f1ab58SBarry Smith PetscInt *icol_rescue; 77b2f1ab58SBarry Smith PetscScalar *val_rescue; 78b2f1ab58SBarry Smith PetscInt n_rescue; 79b2f1ab58SBarry Smith PetscInt n_row_rescue; 80b2f1ab58SBarry Smith PetscInt i_here, i_last, n_copy; 81*d36aa34eSBarry Smith const PetscReal xtra_perc = 20; 82b2f1ab58SBarry Smith 83b2f1ab58SBarry Smith PetscFunctionBegin; 84b2f1ab58SBarry Smith 85b2f1ab58SBarry Smith //********************************************************* 86b2f1ab58SBarry Smith // 1. Choose appropriate array size 87b2f1ab58SBarry Smith // Count number of rows and memory usage which is already final 88b2f1ab58SBarry Smith for (i=0;i<i_row; i++) 89b2f1ab58SBarry Smith { 90b2f1ab58SBarry Smith n_alloc_ok += result->row_nnz[i]; 91b2f1ab58SBarry Smith n_alloc_ok_max += max_row_nnz[i]; 92b2f1ab58SBarry Smith } 93b2f1ab58SBarry Smith 94b2f1ab58SBarry Smith // Count currently needed memory usage and future memory requirements 95b2f1ab58SBarry Smith // (max, predicted) 96b2f1ab58SBarry Smith for (i=i_row; i<nrows; i++) 97b2f1ab58SBarry Smith { 98b2f1ab58SBarry Smith if (result->row_nnz[i]==0) 99b2f1ab58SBarry Smith { 100b2f1ab58SBarry Smith max_need_extra += max_row_nnz[i]; 101b2f1ab58SBarry Smith } 102b2f1ab58SBarry Smith else 103b2f1ab58SBarry Smith { 104b2f1ab58SBarry Smith need_already += max_row_nnz[i]; 105b2f1ab58SBarry Smith n_rows_ahead++; 106b2f1ab58SBarry Smith } 107b2f1ab58SBarry Smith } 108b2f1ab58SBarry Smith 109b2f1ab58SBarry Smith // Make maximal and realistic memory requirement estimates 110b2f1ab58SBarry Smith n_alloc_max = n_alloc_ok + need_already + max_need_extra; 111*d36aa34eSBarry Smith n_alloc_est = n_alloc_ok + need_already + (int) (((PetscReal) max_need_extra) * 112*d36aa34eSBarry Smith ((PetscReal) n_alloc_ok) /((PetscReal) n_alloc_ok_max)); 113b2f1ab58SBarry Smith 114b2f1ab58SBarry Smith // Choose array sizes 115b2f1ab58SBarry Smith if (n_alloc_max == n_alloc_est) 116b2f1ab58SBarry Smith { n_alloc = n_alloc_max; } 117b2f1ab58SBarry Smith else if (n_alloc_now >= n_alloc_est) 118b2f1ab58SBarry Smith { n_alloc = n_alloc_now; } 119b2f1ab58SBarry Smith else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) 120b2f1ab58SBarry Smith { n_alloc = n_alloc_max; } 121b2f1ab58SBarry Smith else 122b2f1ab58SBarry Smith { n_alloc = (int) (n_alloc_est * (1+xtra_perc/100.0)); } 123b2f1ab58SBarry Smith 124b2f1ab58SBarry Smith // If new estimate is less than what we already have, 125b2f1ab58SBarry Smith // don't reallocate, just garbage-collect 126b2f1ab58SBarry Smith if (n_alloc_max != n_alloc_est && n_alloc < result->n_alloc_icol) 127b2f1ab58SBarry Smith { 128b2f1ab58SBarry Smith n_alloc = result->n_alloc_icol; 129b2f1ab58SBarry Smith } 130b2f1ab58SBarry Smith 131b2f1ab58SBarry Smith #ifdef garbage_debug 132b2f1ab58SBarry Smith // Print allocation considerations 133b2f1ab58SBarry Smith printf(" + Final version of rows 1..%d is available\n",i_row); 134b2f1ab58SBarry Smith printf(" They have %d nonzeros: %6.2f per row\n",n_alloc_ok, 135b2f1ab58SBarry Smith (PetscScalar) n_alloc_ok / (PetscScalar)i_row); 136b2f1ab58SBarry Smith printf(" Without droptol, I would have had %d nonzeros\n", 137b2f1ab58SBarry Smith n_alloc_ok_max); 138b2f1ab58SBarry Smith printf(" So I use %6.2f%% of the allowed sparseness\n", 139b2f1ab58SBarry Smith 100.0 * (PetscScalar) n_alloc_ok / 140b2f1ab58SBarry Smith (PetscScalar) n_alloc_ok_max); 141b2f1ab58SBarry Smith printf(" + I have %d rows for which I need full sparseness pattern\n", 142b2f1ab58SBarry Smith n_rows_ahead); 143b2f1ab58SBarry Smith printf(" They require %d nonzeros\n", 144b2f1ab58SBarry Smith need_already); 145b2f1ab58SBarry Smith printf(" + After that, I shall need %d more rows\n", 146b2f1ab58SBarry Smith nrows-n_rows_ahead-i_row); 147b2f1ab58SBarry Smith printf(" Maximally, I will need %d nonzeros there\n", 148b2f1ab58SBarry Smith max_need_extra); 149b2f1ab58SBarry Smith printf(" Realistically, I expect to need %6.2f nonzeros there\n", 150b2f1ab58SBarry Smith (PetscScalar) max_need_extra * 151b2f1ab58SBarry Smith (PetscScalar) n_alloc_ok / 152b2f1ab58SBarry Smith (PetscScalar) n_alloc_ok_max); 153b2f1ab58SBarry Smith printf(" + New allocated arrays should be %d (max) or %d (realistic) long\n", 154b2f1ab58SBarry Smith n_alloc_max, n_alloc_est); 155b2f1ab58SBarry Smith 156b2f1ab58SBarry Smith #endif 157b2f1ab58SBarry Smith // Motivate dimension choice 158b2f1ab58SBarry Smith printf(" Allocating %d nonzeros: ",n_alloc); 159b2f1ab58SBarry Smith if (n_alloc_max == n_alloc_est) 160b2f1ab58SBarry Smith { printf("this is the correct size\n"); } 161b2f1ab58SBarry Smith else if (n_alloc_now >= n_alloc_est) 162b2f1ab58SBarry Smith { printf("the current size, which seems enough\n"); } 163b2f1ab58SBarry Smith else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) 164b2f1ab58SBarry Smith { printf("the maximum estimate\n"); } 165b2f1ab58SBarry Smith else 166b2f1ab58SBarry Smith { printf("%6.2f %% more than the estimate\n",xtra_perc); } 167b2f1ab58SBarry Smith 168b2f1ab58SBarry Smith 169b2f1ab58SBarry Smith //********************************************************* 170b2f1ab58SBarry Smith // 2. Rescue arrays which would be lost 171b2f1ab58SBarry Smith // Count how many rows and nonzeros will have to be rescued 172b2f1ab58SBarry Smith // when moving all arrays in place 173b2f1ab58SBarry Smith n_row_rescue = 0; n_rescue = 0; 174b2f1ab58SBarry Smith if (*n_row_alloc_ok==0) { *n_alloc_used = 0; } 175b2f1ab58SBarry Smith else 176b2f1ab58SBarry Smith { 177b2f1ab58SBarry Smith i = *n_row_alloc_ok - 1; 178b2f1ab58SBarry Smith *n_alloc_used = (result->icols[i]-result->alloc_icol) + 179b2f1ab58SBarry Smith result->row_nnz[i]; 180b2f1ab58SBarry Smith } 181b2f1ab58SBarry Smith 182b2f1ab58SBarry Smith #ifdef garbage_debug 183b2f1ab58SBarry Smith printf("\n\n"); 184b2f1ab58SBarry Smith printf("Rows 0..%d are already OK\n", *n_row_alloc_ok); 185b2f1ab58SBarry Smith printf("Rows 0..%d have the right dimensions\n", i_row-1); 186b2f1ab58SBarry Smith #endif 187b2f1ab58SBarry Smith 188b2f1ab58SBarry Smith for (i=*n_row_alloc_ok; i<nrows; i++) 189b2f1ab58SBarry Smith { 190b2f1ab58SBarry Smith i_here = result->icols[i]-result->alloc_icol; 191b2f1ab58SBarry Smith i_last = i_here + result->row_nnz[i]; 192b2f1ab58SBarry Smith if (result->row_nnz[i]>0) 193b2f1ab58SBarry Smith { 194b2f1ab58SBarry Smith if (*n_alloc_used > i_here || i_last > n_alloc) 195b2f1ab58SBarry Smith { 196b2f1ab58SBarry Smith n_rescue += result->row_nnz[i]; 197b2f1ab58SBarry Smith n_row_rescue++; 198b2f1ab58SBarry Smith //printf(" Rescue row %d: %d nonzeros\n",i,result->row_nnz[i]); 199b2f1ab58SBarry Smith //printf(" (New start at %d, old start at %d)\n", 200b2f1ab58SBarry Smith // *n_alloc_used, i_here); 201b2f1ab58SBarry Smith } 202b2f1ab58SBarry Smith 203b2f1ab58SBarry Smith if (i<i_row) { *n_alloc_used += result->row_nnz[i];} 204b2f1ab58SBarry Smith else { *n_alloc_used += max_row_nnz[i];} 205b2f1ab58SBarry Smith } 206b2f1ab58SBarry Smith } 207b2f1ab58SBarry Smith 208b2f1ab58SBarry Smith #ifdef garbage_debug 209b2f1ab58SBarry Smith printf(" + %d arrays will have to be rescued: %d nnz\n", 210b2f1ab58SBarry Smith n_row_rescue, n_rescue); 211b2f1ab58SBarry Smith #endif 212b2f1ab58SBarry Smith 213b2f1ab58SBarry Smith // Allocate rescue arrays 214b2f1ab58SBarry Smith ierr = PetscMalloc( n_rescue * sizeof(int), &icol_rescue); 215b2f1ab58SBarry Smith CHKERRQ(ierr); 216b2f1ab58SBarry Smith ierr = PetscMalloc( n_rescue * sizeof(PetscScalar), &val_rescue); 217b2f1ab58SBarry Smith CHKERRQ(ierr); 218b2f1ab58SBarry Smith 219b2f1ab58SBarry Smith // Rescue the arrays which need rescuing 220b2f1ab58SBarry Smith n_row_rescue = 0; n_rescue = 0; 221b2f1ab58SBarry Smith if (*n_row_alloc_ok==0) { *n_alloc_used = 0; } 222b2f1ab58SBarry Smith else 223b2f1ab58SBarry Smith { 224b2f1ab58SBarry Smith i = *n_row_alloc_ok - 1; 225b2f1ab58SBarry Smith *n_alloc_used = (result->icols[i]-result->alloc_icol) + 226b2f1ab58SBarry Smith result->row_nnz[i]; 227b2f1ab58SBarry Smith } 228b2f1ab58SBarry Smith 229b2f1ab58SBarry Smith for (i=*n_row_alloc_ok; i<nrows; i++) 230b2f1ab58SBarry Smith { 231b2f1ab58SBarry Smith i_here = result->icols[i]-result->alloc_icol; 232b2f1ab58SBarry Smith i_last = i_here + result->row_nnz[i]; 233b2f1ab58SBarry Smith if (result->row_nnz[i]>0) 234b2f1ab58SBarry Smith { 235b2f1ab58SBarry Smith if (*n_alloc_used > i_here || i_last > n_alloc) 236b2f1ab58SBarry Smith { 237b2f1ab58SBarry Smith //printf(" Rescuing row %d to array[%d..]\n",i,n_rescue); 238b2f1ab58SBarry Smith memcpy((void*) &icol_rescue[n_rescue], 239b2f1ab58SBarry Smith (void*) result->icols[i], 240b2f1ab58SBarry Smith result->row_nnz[i] * sizeof(int)); 241b2f1ab58SBarry Smith memcpy((void*) &val_rescue[n_rescue], 242b2f1ab58SBarry Smith (void*) result->values[i], 243b2f1ab58SBarry Smith result->row_nnz[i] * sizeof(PetscScalar)); 244b2f1ab58SBarry Smith n_rescue += result->row_nnz[i]; 245b2f1ab58SBarry Smith n_row_rescue++; 246b2f1ab58SBarry Smith } 247b2f1ab58SBarry Smith 248b2f1ab58SBarry Smith if (i<i_row) { *n_alloc_used += result->row_nnz[i];} 249b2f1ab58SBarry Smith else { *n_alloc_used += max_row_nnz[i];} 250b2f1ab58SBarry Smith } 251b2f1ab58SBarry Smith } 252b2f1ab58SBarry Smith 253b2f1ab58SBarry Smith //********************************************************* 254b2f1ab58SBarry Smith // 3. Reallocate and correct administration 255b2f1ab58SBarry Smith 256b2f1ab58SBarry Smith if (n_alloc != result->n_alloc_icol) 257b2f1ab58SBarry Smith { 258b2f1ab58SBarry Smith n_copy = MIN(n_alloc,result->n_alloc_icol); 259b2f1ab58SBarry Smith 260b2f1ab58SBarry Smith // PETSC knows no REALLOC, so we'll REALLOC ourselves. 261b2f1ab58SBarry Smith 262b2f1ab58SBarry Smith // Allocate new icol-data, copy old contents 263b2f1ab58SBarry Smith ierr = PetscMalloc( n_alloc * sizeof(int), 264b2f1ab58SBarry Smith &result->alloc_icol); CHKERRQ(ierr); 265b2f1ab58SBarry Smith memcpy(result->alloc_icol, alloc_icol_old, n_copy*sizeof(int)); 266b2f1ab58SBarry Smith 267b2f1ab58SBarry Smith // Update administration, Reset pointers to new arrays 268b2f1ab58SBarry Smith result->n_alloc_icol = n_alloc; 269b2f1ab58SBarry Smith for (i=0; i<nrows; i++) 270b2f1ab58SBarry Smith { 271b2f1ab58SBarry Smith result->icols[i] = result->alloc_icol + 272b2f1ab58SBarry Smith (result->icols[i] - alloc_icol_old); 273b2f1ab58SBarry Smith result->values[i] = result->alloc_val + 274b2f1ab58SBarry Smith (result->icols[i] - result->alloc_icol); 275b2f1ab58SBarry Smith } 276b2f1ab58SBarry Smith 277b2f1ab58SBarry Smith // Delete old array 278b2f1ab58SBarry Smith ierr = PetscFree(alloc_icol_old); CHKERRQ(ierr); 279b2f1ab58SBarry Smith 280b2f1ab58SBarry Smith // Allocate new value-data, copy old contents 281b2f1ab58SBarry Smith ierr = PetscMalloc( n_alloc * sizeof(PetscScalar), 282b2f1ab58SBarry Smith &result->alloc_val); CHKERRQ(ierr); 283b2f1ab58SBarry Smith memcpy(result->alloc_val, alloc_val_old, n_copy*sizeof(PetscScalar)); 284b2f1ab58SBarry Smith 285b2f1ab58SBarry Smith // Update administration, Reset pointers to new arrays 286b2f1ab58SBarry Smith result->n_alloc_val = n_alloc; 287b2f1ab58SBarry Smith for (i=0; i<nrows; i++) 288b2f1ab58SBarry Smith { 289b2f1ab58SBarry Smith result->values[i] = result->alloc_val + 290b2f1ab58SBarry Smith (result->icols[i] - result->alloc_icol); 291b2f1ab58SBarry Smith } 292b2f1ab58SBarry Smith 293b2f1ab58SBarry Smith // Delete old array 294b2f1ab58SBarry Smith ierr = PetscFree(alloc_val_old); CHKERRQ(ierr); 295b2f1ab58SBarry Smith } 296b2f1ab58SBarry Smith 297b2f1ab58SBarry Smith 298b2f1ab58SBarry Smith //********************************************************* 299b2f1ab58SBarry Smith // 4. Copy all the arrays to their proper places 300b2f1ab58SBarry Smith n_row_rescue = 0; n_rescue = 0; 301b2f1ab58SBarry Smith if (*n_row_alloc_ok==0) { *n_alloc_used = 0; } 302b2f1ab58SBarry Smith else 303b2f1ab58SBarry Smith { 304b2f1ab58SBarry Smith i = *n_row_alloc_ok - 1; 305b2f1ab58SBarry Smith *n_alloc_used = (result->icols[i]-result->alloc_icol) + 306b2f1ab58SBarry Smith result->row_nnz[i]; 307b2f1ab58SBarry Smith } 308b2f1ab58SBarry Smith 309b2f1ab58SBarry Smith for (i=*n_row_alloc_ok; i<nrows; i++) 310b2f1ab58SBarry Smith { 311b2f1ab58SBarry Smith i_here = result->icols[i]-result->alloc_icol; 312b2f1ab58SBarry Smith i_last = i_here + result->row_nnz[i]; 313b2f1ab58SBarry Smith 314b2f1ab58SBarry Smith result->icols[i] = result->alloc_icol + *n_alloc_used; 315b2f1ab58SBarry Smith result->values[i]= result->alloc_val + *n_alloc_used; 316b2f1ab58SBarry Smith 317b2f1ab58SBarry Smith if (result->row_nnz[i]>0) 318b2f1ab58SBarry Smith { 319b2f1ab58SBarry Smith if (*n_alloc_used > i_here || i_last > n_alloc) 320b2f1ab58SBarry Smith { 321b2f1ab58SBarry Smith //printf(" Copying rescued array[%d..] to row %d\n", 322b2f1ab58SBarry Smith // n_rescue,i); 323b2f1ab58SBarry Smith //fflush(stdout); 324b2f1ab58SBarry Smith memcpy((void*) result->icols[i], 325b2f1ab58SBarry Smith (void*) &icol_rescue[n_rescue], 326b2f1ab58SBarry Smith result->row_nnz[i] * sizeof(int)); 327b2f1ab58SBarry Smith memcpy((void*) result->values[i], 328b2f1ab58SBarry Smith (void*) &val_rescue[n_rescue], 329b2f1ab58SBarry Smith result->row_nnz[i] * sizeof(PetscScalar)); 330b2f1ab58SBarry Smith n_rescue += result->row_nnz[i]; 331b2f1ab58SBarry Smith n_row_rescue++; 332b2f1ab58SBarry Smith } 333b2f1ab58SBarry Smith else 334b2f1ab58SBarry Smith { 335b2f1ab58SBarry Smith for (j=0; j<result->row_nnz[i]; j++) 336b2f1ab58SBarry Smith { 337b2f1ab58SBarry Smith result->icols[i][j] = result->alloc_icol[i_here+j]; 338b2f1ab58SBarry Smith result->values[i][j] = result->alloc_val[ i_here+j]; 339b2f1ab58SBarry Smith } 340b2f1ab58SBarry Smith } 341b2f1ab58SBarry Smith if (i<i_row) { *n_alloc_used += result->row_nnz[i];} 342b2f1ab58SBarry Smith else { *n_alloc_used += max_row_nnz[i];} 343b2f1ab58SBarry Smith } 344b2f1ab58SBarry Smith } 345b2f1ab58SBarry Smith 346b2f1ab58SBarry Smith // Delete the rescue arrays 347b2f1ab58SBarry Smith ierr = PetscFree(icol_rescue); CHKERRQ(ierr); 348b2f1ab58SBarry Smith ierr = PetscFree(val_rescue); CHKERRQ(ierr); 349b2f1ab58SBarry Smith *n_row_alloc_ok = i_row; 350b2f1ab58SBarry Smith 351b2f1ab58SBarry Smith PetscFunctionReturn(0); 352b2f1ab58SBarry Smith 353b2f1ab58SBarry Smith } 354b2f1ab58SBarry Smith 355b2f1ab58SBarry Smith /* 356b2f1ab58SBarry Smith spbas_incomplete_cholesky: 357b2f1ab58SBarry Smith incomplete Cholesky decomposition of a square, symmetric, 358b2f1ab58SBarry Smith positive definite matrix. 359b2f1ab58SBarry Smith 360b2f1ab58SBarry Smith In case negative diagonals are encountered, function returns 361b2f1ab58SBarry Smith NEGATIVE_DIAGONAL. When this happens, call this function again 362b2f1ab58SBarry Smith with a larger epsdiag_in, a less sparse pattern, and/or a smaller 363b2f1ab58SBarry Smith droptol 364b2f1ab58SBarry Smith */ 365b2f1ab58SBarry Smith #undef __FUNCT__ 366b2f1ab58SBarry Smith #define __FUNCT__ "spbas_incomplete_cholesky" 367b2f1ab58SBarry Smith PetscErrorCode spbas_incomplete_cholesky( 368b2f1ab58SBarry Smith Mat A, const PetscInt *rip, const PetscInt *riip, 369b2f1ab58SBarry Smith spbas_matrix pattern, 370*d36aa34eSBarry Smith PetscReal droptol, PetscReal epsdiag_in, 371b2f1ab58SBarry Smith spbas_matrix * matrix_L) 372b2f1ab58SBarry Smith { 373b2f1ab58SBarry Smith 374b2f1ab58SBarry Smith #undef cholesky_debug 375b2f1ab58SBarry Smith 376b2f1ab58SBarry Smith #ifdef cholesky_debug 377b2f1ab58SBarry Smith PetscInt idebug=0; 378b2f1ab58SBarry Smith const char * fmt= " (%d,%d) -> %12.8e\n"; 379b2f1ab58SBarry Smith PetscScalar tmp; 380b2f1ab58SBarry Smith #endif 381b2f1ab58SBarry Smith 382b2f1ab58SBarry Smith PetscInt jL; 383b2f1ab58SBarry Smith Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 384b2f1ab58SBarry Smith PetscInt *ai=a->i,*aj=a->j; 385b2f1ab58SBarry Smith MatScalar *aa=a->a; 386b2f1ab58SBarry Smith PetscInt nrows, ncols; 387b2f1ab58SBarry Smith PetscInt *max_row_nnz; 388b2f1ab58SBarry Smith PetscErrorCode ierr; 389b2f1ab58SBarry Smith spbas_matrix retval; 390b2f1ab58SBarry Smith PetscScalar * diag; 391b2f1ab58SBarry Smith PetscScalar * val; 392b2f1ab58SBarry Smith PetscScalar * lvec; 393b2f1ab58SBarry Smith PetscScalar epsdiag; 394b2f1ab58SBarry Smith PetscInt i,j,k; 395b2f1ab58SBarry Smith const PetscTruth do_values = PETSC_TRUE; 396b2f1ab58SBarry Smith PetscInt * r1_icol; PetscScalar *r1_val; 397b2f1ab58SBarry Smith PetscInt * r_icol; PetscInt r_nnz; PetscScalar *r_val; 398b2f1ab58SBarry Smith PetscInt * A_icol; PetscInt A_nnz; PetscScalar *A_val; 399b2f1ab58SBarry Smith PetscInt * p_icol; PetscInt p_nnz; 400b2f1ab58SBarry Smith PetscInt n_row_alloc_ok = 0; // number of rows which have been stored 401b2f1ab58SBarry Smith // correctly in the matrix 402b2f1ab58SBarry Smith PetscInt n_alloc_used = 0; // part of result->icols and result->values 403b2f1ab58SBarry Smith // which is currently being used 404b2f1ab58SBarry Smith PetscFunctionBegin; 405b2f1ab58SBarry Smith 406b2f1ab58SBarry Smith // Convert the Manteuffel shift from 'fraction of average diagonal' to 407b2f1ab58SBarry Smith // dimensioned value 408b2f1ab58SBarry Smith ierr = MatGetSize(A, &nrows, &ncols); CHKERRQ(ierr); 409b2f1ab58SBarry Smith ierr = MatGetTrace(A, &epsdiag); CHKERRQ(ierr); 410b2f1ab58SBarry Smith epsdiag *= epsdiag_in / nrows; 411b2f1ab58SBarry Smith printf(" Dimensioned Manteuffel shift=%e\n", epsdiag); 412b2f1ab58SBarry Smith printf(" Drop tolerance =%e\n",droptol); 413b2f1ab58SBarry Smith 414b2f1ab58SBarry Smith if (droptol<1e-10) {droptol=1e-10;} 415b2f1ab58SBarry Smith 416b2f1ab58SBarry Smith // Check dimensions 417b2f1ab58SBarry Smith if ( (nrows != pattern.nrows) || 418b2f1ab58SBarry Smith (ncols != pattern.ncols) || 419b2f1ab58SBarry Smith (ncols != nrows) ) 420b2f1ab58SBarry Smith { 421b2f1ab58SBarry Smith SETERRQ( PETSC_ERR_ARG_INCOMP, 422b2f1ab58SBarry Smith "Dimension error in spbas_incomplete_cholesky\n"); 423b2f1ab58SBarry Smith } 424b2f1ab58SBarry Smith 425b2f1ab58SBarry Smith // Set overall values 426b2f1ab58SBarry Smith retval.nrows = nrows; 427b2f1ab58SBarry Smith retval.ncols = nrows; 428b2f1ab58SBarry Smith retval.nnz = pattern.nnz/10; 429b2f1ab58SBarry Smith retval.col_idx_type = SPBAS_COLUMN_NUMBERS; 430b2f1ab58SBarry Smith retval.block_data = PETSC_TRUE; 431b2f1ab58SBarry Smith 432b2f1ab58SBarry Smith // Allocate sparseness pattern 433b2f1ab58SBarry Smith ierr = spbas_allocate_pattern(&retval, do_values); CHKERRQ(ierr); 434b2f1ab58SBarry Smith // Initial allocation of data 435b2f1ab58SBarry Smith memset((void*) retval.row_nnz, 0, nrows*sizeof(int)); 436b2f1ab58SBarry Smith ierr = spbas_allocate_data(&retval); CHKERRQ(ierr); 437b2f1ab58SBarry Smith retval.nnz = 0; 438b2f1ab58SBarry Smith 439b2f1ab58SBarry Smith // Allocate work arrays 440b2f1ab58SBarry Smith ierr = PetscMalloc(nrows*sizeof(PetscScalar), &diag); CHKERRQ(ierr); 441b2f1ab58SBarry Smith ierr = PetscMalloc(nrows*sizeof(PetscScalar), &val); CHKERRQ(ierr); 442b2f1ab58SBarry Smith ierr = PetscMalloc(nrows*sizeof(PetscScalar), &lvec); CHKERRQ(ierr); 443b2f1ab58SBarry Smith ierr = PetscMalloc(nrows*sizeof(int), &max_row_nnz); CHKERRQ(ierr); 444b2f1ab58SBarry Smith if (!(diag && val && lvec && max_row_nnz)) 445b2f1ab58SBarry Smith { 446b2f1ab58SBarry Smith SETERRQ( PETSC_ERR_MEM, 447b2f1ab58SBarry Smith "Allocation error in spbas_incomplete_cholesky\n"); 448b2f1ab58SBarry Smith } 449b2f1ab58SBarry Smith 450b2f1ab58SBarry Smith memset((void*) val, 0, nrows*sizeof(PetscScalar)); 451b2f1ab58SBarry Smith memset((void*) lvec, 0, nrows*sizeof(PetscScalar)); 452b2f1ab58SBarry Smith memset((void*) max_row_nnz, 0, nrows*sizeof(int)); 453b2f1ab58SBarry Smith 454b2f1ab58SBarry Smith // Check correct format of sparseness pattern 455b2f1ab58SBarry Smith if (pattern.col_idx_type != SPBAS_DIAGONAL_OFFSETS) 456b2f1ab58SBarry Smith { 457b2f1ab58SBarry Smith SETERRQ( PETSC_ERR_SUP_SYS, 458b2f1ab58SBarry Smith "Error in spbas_incomplete_cholesky: must have diagonal offsets in pattern\n"); 459b2f1ab58SBarry Smith } 460b2f1ab58SBarry Smith 461b2f1ab58SBarry Smith // Count the nonzeros on transpose of pattern 462b2f1ab58SBarry Smith for (i = 0; i<nrows; i++) 463b2f1ab58SBarry Smith { 464b2f1ab58SBarry Smith p_nnz = pattern.row_nnz[i]; 465b2f1ab58SBarry Smith p_icol = pattern.icols[i]; 466b2f1ab58SBarry Smith for (j=0; j<p_nnz; j++) 467b2f1ab58SBarry Smith { 468b2f1ab58SBarry Smith max_row_nnz[i+p_icol[j]]++; 469b2f1ab58SBarry Smith } 470b2f1ab58SBarry Smith } 471b2f1ab58SBarry Smith 472b2f1ab58SBarry Smith // Calculate rows of L 473b2f1ab58SBarry Smith for (i = 0; i<nrows; i++) 474b2f1ab58SBarry Smith { 475b2f1ab58SBarry Smith p_nnz = pattern.row_nnz[i]; 476b2f1ab58SBarry Smith p_icol = pattern.icols[i]; 477b2f1ab58SBarry Smith 478b2f1ab58SBarry Smith r_nnz = retval.row_nnz[i]; 479b2f1ab58SBarry Smith r_icol = retval.icols[i]; 480b2f1ab58SBarry Smith r_val = retval.values[i]; 481b2f1ab58SBarry Smith A_nnz = ai[rip[i]+1] - ai[rip[i]]; 482b2f1ab58SBarry Smith A_icol = &aj[ai[rip[i]]]; 483b2f1ab58SBarry Smith A_val = &aa[ai[rip[i]]]; 484b2f1ab58SBarry Smith 485b2f1ab58SBarry Smith // Calculate val += A(i,i:n)'; 486b2f1ab58SBarry Smith for (j=0; j<A_nnz; j++) 487b2f1ab58SBarry Smith { 488b2f1ab58SBarry Smith k = riip[A_icol[j]]; 489b2f1ab58SBarry Smith if (k>=i) { val[k] = A_val[j]; } 490b2f1ab58SBarry Smith } 491b2f1ab58SBarry Smith 492b2f1ab58SBarry Smith // Add regularization 493b2f1ab58SBarry Smith val[i] += epsdiag; 494b2f1ab58SBarry Smith 495b2f1ab58SBarry Smith // Calculate lvec = diag(D(0:i-1)) * L(0:i-1,i); 496b2f1ab58SBarry Smith // val(i) = A(i,i) - L(0:i-1,i)' * lvec 497b2f1ab58SBarry Smith for ( j=0; j<r_nnz; j++) 498b2f1ab58SBarry Smith { 499b2f1ab58SBarry Smith k = r_icol[j]; 500b2f1ab58SBarry Smith lvec[k] = diag[k] * r_val[j]; 501b2f1ab58SBarry Smith val[i] -= r_val[j] * lvec[k]; 502b2f1ab58SBarry Smith } 503b2f1ab58SBarry Smith 504b2f1ab58SBarry Smith // Calculate the new diagonal 505b2f1ab58SBarry Smith diag[i] = val[i]; 506*d36aa34eSBarry Smith if (PetscAbsScalar(diag[i])<droptol) 507b2f1ab58SBarry Smith { 508b2f1ab58SBarry Smith printf("Error in spbas_incomplete_cholesky:\n"); 509b2f1ab58SBarry Smith printf("Negative diagonal in row %d\n",i+1); 510b2f1ab58SBarry Smith 511b2f1ab58SBarry Smith // Delete the whole matrix at once. 512b2f1ab58SBarry Smith ierr = spbas_delete(retval); 513b2f1ab58SBarry Smith return NEGATIVE_DIAGONAL; 514b2f1ab58SBarry Smith } 515b2f1ab58SBarry Smith 516b2f1ab58SBarry Smith #ifdef cholesky_debug 517b2f1ab58SBarry Smith if (idebug>=3) 518b2f1ab58SBarry Smith { 519b2f1ab58SBarry Smith printf("diagD * L (%d,:)=\n",i+1); 520b2f1ab58SBarry Smith for (j=0; j<r_nnz; j++) 521b2f1ab58SBarry Smith { 522b2f1ab58SBarry Smith printf(fmt,r_icol[j]+1,i+1,lvec[r_icol[j]]); 523b2f1ab58SBarry Smith } 524b2f1ab58SBarry Smith printf("\n\n"); 525b2f1ab58SBarry Smith } 526b2f1ab58SBarry Smith 527b2f1ab58SBarry Smith if (idebug>=1) { printf("Diag = %12.8e\n\n\n",diag[i]);} 528b2f1ab58SBarry Smith 529b2f1ab58SBarry Smith if (idebug>=2) 530b2f1ab58SBarry Smith { 531b2f1ab58SBarry Smith printf("L^T * diagD * L=\n"); 532b2f1ab58SBarry Smith tmp = 0.0; 533b2f1ab58SBarry Smith for ( j=0; j<r_nnz; j++) 534b2f1ab58SBarry Smith { 535b2f1ab58SBarry Smith k = r_icol[j]; 536b2f1ab58SBarry Smith tmp += r_val[j] * lvec[k]; 537b2f1ab58SBarry Smith } 538b2f1ab58SBarry Smith printf(fmt, i+1,i+1,tmp); 539b2f1ab58SBarry Smith } 540b2f1ab58SBarry Smith #endif 541b2f1ab58SBarry Smith 542b2f1ab58SBarry Smith // If necessary, allocate arrays 543b2f1ab58SBarry Smith if (r_nnz==0) 544b2f1ab58SBarry Smith { 545b2f1ab58SBarry Smith ierr = spbas_cholesky_row_alloc( retval, i, 1, &n_alloc_used); 546b2f1ab58SBarry Smith if (ierr == PETSC_ERR_MEM) 547b2f1ab58SBarry Smith { 548b2f1ab58SBarry Smith ierr = spbas_cholesky_garbage_collect( &retval, i, &n_row_alloc_ok, 549b2f1ab58SBarry Smith &n_alloc_used, max_row_nnz); 550b2f1ab58SBarry Smith CHKERRQ(ierr); 551b2f1ab58SBarry Smith ierr = spbas_cholesky_row_alloc( retval, i, 1, &n_alloc_used); 552b2f1ab58SBarry Smith } 553b2f1ab58SBarry Smith CHKERRQ(ierr); 554b2f1ab58SBarry Smith r_icol = retval.icols[i]; 555b2f1ab58SBarry Smith r_val = retval.values[i]; 556b2f1ab58SBarry Smith } 557b2f1ab58SBarry Smith 558b2f1ab58SBarry Smith // Now, fill in 559b2f1ab58SBarry Smith r_icol[r_nnz] = i; 560b2f1ab58SBarry Smith r_val [r_nnz] = 1.0; 561b2f1ab58SBarry Smith r_nnz++; 562b2f1ab58SBarry Smith retval.row_nnz[i]++; 563b2f1ab58SBarry Smith 564b2f1ab58SBarry Smith retval.nnz += r_nnz; 565b2f1ab58SBarry Smith 566b2f1ab58SBarry Smith // Calculate 567b2f1ab58SBarry Smith // val(i+1:n) = (A(i,i+1:n)- L(0:i-1,i+1:n)' * lvec)/diag(i) 568b2f1ab58SBarry Smith for (j=1; j<p_nnz; j++) 569b2f1ab58SBarry Smith { 570b2f1ab58SBarry Smith k = i+p_icol[j]; 571b2f1ab58SBarry Smith r1_icol = retval.icols[k]; 572b2f1ab58SBarry Smith r1_val = retval.values[k]; 573b2f1ab58SBarry Smith #ifdef cholesky_debug 574b2f1ab58SBarry Smith tmp = 0.0; 575b2f1ab58SBarry Smith #endif 576b2f1ab58SBarry Smith for (jL=0; jL<retval.row_nnz[k]; jL++) 577b2f1ab58SBarry Smith { 578b2f1ab58SBarry Smith val[k] -= r1_val[jL] * lvec[r1_icol[jL]]; 579b2f1ab58SBarry Smith #ifdef cholesky_debug 580b2f1ab58SBarry Smith tmp += r1_val[jL] * lvec[r1_icol[jL]]; 581b2f1ab58SBarry Smith #endif 582b2f1ab58SBarry Smith } 583b2f1ab58SBarry Smith #ifdef cholesky_debug 584b2f1ab58SBarry Smith if (idebug>=2 && tmp!=0) {printf(fmt, k+1,i+1,tmp);} 585b2f1ab58SBarry Smith #endif 586b2f1ab58SBarry Smith val[k] /= diag[i]; 587b2f1ab58SBarry Smith 588*d36aa34eSBarry Smith if (PetscAbsScalar(val[k]) > droptol || PetscAbsScalar(val[k])< -droptol) 589b2f1ab58SBarry Smith { 590b2f1ab58SBarry Smith // If necessary, allocate arrays 591b2f1ab58SBarry Smith if (retval.row_nnz[k]==0) 592b2f1ab58SBarry Smith { 593b2f1ab58SBarry Smith ierr = spbas_cholesky_row_alloc( retval, k, max_row_nnz[k], 594b2f1ab58SBarry Smith &n_alloc_used); 595b2f1ab58SBarry Smith if (ierr == PETSC_ERR_MEM) 596b2f1ab58SBarry Smith { 597b2f1ab58SBarry Smith ierr = spbas_cholesky_garbage_collect( 598b2f1ab58SBarry Smith &retval, i, &n_row_alloc_ok, 599b2f1ab58SBarry Smith &n_alloc_used, max_row_nnz); 600b2f1ab58SBarry Smith CHKERRQ(ierr); 601b2f1ab58SBarry Smith ierr = spbas_cholesky_row_alloc( retval, k, max_row_nnz[k], 602b2f1ab58SBarry Smith &n_alloc_used); 603b2f1ab58SBarry Smith r_icol = retval.icols[i]; 604b2f1ab58SBarry Smith r_val = retval.values[i]; 605b2f1ab58SBarry Smith } 606b2f1ab58SBarry Smith CHKERRQ(ierr); 607b2f1ab58SBarry Smith } 608b2f1ab58SBarry Smith 609b2f1ab58SBarry Smith // Now, fill in 610b2f1ab58SBarry Smith retval.icols[k][retval.row_nnz[k]] = i; 611b2f1ab58SBarry Smith retval.values[k][retval.row_nnz[k]] = val[k]; 612b2f1ab58SBarry Smith retval.row_nnz[k]++; 613b2f1ab58SBarry Smith } 614b2f1ab58SBarry Smith val[k] = 0; 615b2f1ab58SBarry Smith } 616b2f1ab58SBarry Smith #ifdef cholesky_debug 617b2f1ab58SBarry Smith if (idebug>=2) {printf("\n\n");} 618b2f1ab58SBarry Smith #endif 619b2f1ab58SBarry Smith 620b2f1ab58SBarry Smith // Erase the values used in the work arrays 621b2f1ab58SBarry Smith for (j=0; j<r_nnz; j++) { lvec[r_icol[j]] = 0; } 622b2f1ab58SBarry Smith 623b2f1ab58SBarry Smith #ifdef cholesky_debug 624b2f1ab58SBarry Smith if (idebug>=2) 625b2f1ab58SBarry Smith { 626b2f1ab58SBarry Smith printf("new L=\n"); 627b2f1ab58SBarry Smith for (jL=i; jL<nrows; jL++) 628b2f1ab58SBarry Smith { 629b2f1ab58SBarry Smith j = retval.row_nnz[jL]-1; 630b2f1ab58SBarry Smith if (j>=0) 631b2f1ab58SBarry Smith { 632b2f1ab58SBarry Smith k = retval.icols[jL][j]; 633b2f1ab58SBarry Smith if (k==i) printf(fmt, jL+1,i+1,retval.values[jL][j]); 634b2f1ab58SBarry Smith } 635b2f1ab58SBarry Smith } 636b2f1ab58SBarry Smith printf("\n\n"); 637b2f1ab58SBarry Smith } 638b2f1ab58SBarry Smith #endif 639b2f1ab58SBarry Smith } 640b2f1ab58SBarry Smith 641b2f1ab58SBarry Smith ierr=PetscFree(lvec); CHKERRQ(ierr); ierr=PetscFree(val); CHKERRQ(ierr); 642b2f1ab58SBarry Smith 643b2f1ab58SBarry Smith ierr = spbas_cholesky_garbage_collect( &retval, nrows, &n_row_alloc_ok, 644b2f1ab58SBarry Smith &n_alloc_used, max_row_nnz); 645b2f1ab58SBarry Smith ierr=PetscFree(max_row_nnz); CHKERRQ(ierr); 646b2f1ab58SBarry Smith 647b2f1ab58SBarry Smith // Place the inverse of the diagonals in the matrix 648b2f1ab58SBarry Smith for (i=0; i<nrows; i++) 649b2f1ab58SBarry Smith { 650b2f1ab58SBarry Smith r_nnz = retval.row_nnz[i]; 651b2f1ab58SBarry Smith retval.values[i][r_nnz-1] = 1.0 / diag[i]; 652b2f1ab58SBarry Smith for (j=0; j<r_nnz-1; j++) 653b2f1ab58SBarry Smith { 654b2f1ab58SBarry Smith retval.values[i][j] *= -1; 655b2f1ab58SBarry Smith } 656b2f1ab58SBarry Smith } 657b2f1ab58SBarry Smith ierr=PetscFree(diag); CHKERRQ(ierr); 658b2f1ab58SBarry Smith 659b2f1ab58SBarry Smith 660b2f1ab58SBarry Smith // Return successfully 661b2f1ab58SBarry Smith *matrix_L = retval; 662b2f1ab58SBarry Smith 663b2f1ab58SBarry Smith #ifdef cholesky_debug 664b2f1ab58SBarry Smith for (i = 0; i<nrows; i++) 665b2f1ab58SBarry Smith { 666b2f1ab58SBarry Smith printf("Row %d\n",i+1); 667b2f1ab58SBarry Smith for (j=0; j<retval.row_nnz[i]; j++) 668b2f1ab58SBarry Smith { 669b2f1ab58SBarry Smith printf(fmt, i+1,retval.icols[i][j]+1,retval.values[i][j]); 670b2f1ab58SBarry Smith } 671b2f1ab58SBarry Smith } 672b2f1ab58SBarry Smith #endif 673b2f1ab58SBarry Smith PetscFunctionReturn(0); 674b2f1ab58SBarry Smith } 675b2f1ab58SBarry Smith 676