1*b2f1ab58SBarry Smith 2*b2f1ab58SBarry Smith /* 3*b2f1ab58SBarry Smith spbas_cholesky_row_alloc: 4*b2f1ab58SBarry Smith in the data arrays, find a place where another row may be stored. 5*b2f1ab58SBarry Smith Return PETSC_ERR_MEM if there is insufficient space to store the 6*b2f1ab58SBarry Smith row, so garbage collection and/or re-allocation may be done. 7*b2f1ab58SBarry Smith */ 8*b2f1ab58SBarry Smith #undef __FUNCT__ 9*b2f1ab58SBarry Smith #define __FUNCT__ "spbas_cholesky_row_alloc" 10*b2f1ab58SBarry Smith PetscErrorCode spbas_cholesky_row_alloc(spbas_matrix retval, PetscInt k, PetscInt r_nnz,PetscInt * n_alloc_used) 11*b2f1ab58SBarry Smith { 12*b2f1ab58SBarry Smith PetscFunctionBegin; 13*b2f1ab58SBarry Smith retval.icols[k] = &retval.alloc_icol[*n_alloc_used]; 14*b2f1ab58SBarry Smith retval.values[k] = &retval.alloc_val[*n_alloc_used]; 15*b2f1ab58SBarry Smith *n_alloc_used += r_nnz; 16*b2f1ab58SBarry Smith if (*n_alloc_used > retval.n_alloc_icol) 17*b2f1ab58SBarry Smith { 18*b2f1ab58SBarry Smith PetscFunctionReturn(PETSC_ERR_MEM); 19*b2f1ab58SBarry Smith } 20*b2f1ab58SBarry Smith else 21*b2f1ab58SBarry Smith { 22*b2f1ab58SBarry Smith PetscFunctionReturn(0); 23*b2f1ab58SBarry Smith } 24*b2f1ab58SBarry Smith } 25*b2f1ab58SBarry Smith 26*b2f1ab58SBarry Smith 27*b2f1ab58SBarry Smith /* 28*b2f1ab58SBarry Smith spbas_cholesky_garbage_collect: 29*b2f1ab58SBarry Smith move the rows which have been calculated so far, as well as 30*b2f1ab58SBarry Smith those currently under construction, to the front of the 31*b2f1ab58SBarry Smith array, while putting them in the proper order. 32*b2f1ab58SBarry Smith When it seems necessary, enlarge the current arrays. 33*b2f1ab58SBarry Smith 34*b2f1ab58SBarry Smith NB: re-allocation is being simulated using 35*b2f1ab58SBarry Smith PetscMalloc, memcpy, PetscFree, because 36*b2f1ab58SBarry Smith PetscRealloc does not seem to exist. 37*b2f1ab58SBarry Smith 38*b2f1ab58SBarry Smith */ 39*b2f1ab58SBarry Smith #undef __FUNCT__ 40*b2f1ab58SBarry Smith #define __FUNCT__ "spbas_cholesky_garbage_collect" 41*b2f1ab58SBarry Smith PetscErrorCode spbas_cholesky_garbage_collect( 42*b2f1ab58SBarry Smith spbas_matrix * result, // I/O: the Cholesky factor matrix being constructed. 43*b2f1ab58SBarry Smith // Only the storage, not the contents of this matrix 44*b2f1ab58SBarry Smith // is changed in this function 45*b2f1ab58SBarry Smith PetscInt i_row, // I : Number of rows for which the final contents are 46*b2f1ab58SBarry Smith // known 47*b2f1ab58SBarry Smith PetscInt *n_row_alloc_ok, // I/O: Number of rows which are already in their final 48*b2f1ab58SBarry Smith // places in the arrays: they need not be moved any more 49*b2f1ab58SBarry Smith PetscInt *n_alloc_used, // I/O: 50*b2f1ab58SBarry Smith PetscInt *max_row_nnz // I : Over-estimate of the number of nonzeros needed to 51*b2f1ab58SBarry Smith // store each row 52*b2f1ab58SBarry Smith ) 53*b2f1ab58SBarry Smith { 54*b2f1ab58SBarry Smith 55*b2f1ab58SBarry Smith #undef garbage_debug 56*b2f1ab58SBarry Smith 57*b2f1ab58SBarry Smith // PSEUDO-CODE: 58*b2f1ab58SBarry Smith // 1. Choose the appropriate size for the arrays 59*b2f1ab58SBarry Smith // 2. Rescue the arrays which would be lost during garbage collection 60*b2f1ab58SBarry Smith // 3. Reallocate and correct administration 61*b2f1ab58SBarry Smith // 4. Move all arrays so that they are in proper order 62*b2f1ab58SBarry Smith 63*b2f1ab58SBarry Smith PetscInt i,j; 64*b2f1ab58SBarry Smith PetscInt nrows = result->nrows; 65*b2f1ab58SBarry Smith PetscInt n_alloc_ok=0; 66*b2f1ab58SBarry Smith PetscInt n_alloc_ok_max=0; 67*b2f1ab58SBarry Smith 68*b2f1ab58SBarry Smith PetscErrorCode ierr; 69*b2f1ab58SBarry Smith PetscInt need_already= 0; 70*b2f1ab58SBarry Smith PetscInt n_rows_ahead=0; 71*b2f1ab58SBarry Smith PetscInt max_need_extra= 0; 72*b2f1ab58SBarry Smith PetscInt n_alloc_max, n_alloc_est, n_alloc; 73*b2f1ab58SBarry Smith PetscInt n_alloc_now = result->n_alloc_icol; 74*b2f1ab58SBarry Smith PetscInt *alloc_icol_old = result->alloc_icol; 75*b2f1ab58SBarry Smith PetscScalar *alloc_val_old = result->alloc_val; 76*b2f1ab58SBarry Smith PetscInt *icol_rescue; 77*b2f1ab58SBarry Smith PetscScalar *val_rescue; 78*b2f1ab58SBarry Smith PetscInt n_rescue; 79*b2f1ab58SBarry Smith PetscInt n_row_rescue; 80*b2f1ab58SBarry Smith PetscInt i_here, i_last, n_copy; 81*b2f1ab58SBarry Smith const PetscScalar xtra_perc = 20; 82*b2f1ab58SBarry Smith 83*b2f1ab58SBarry Smith PetscFunctionBegin; 84*b2f1ab58SBarry Smith 85*b2f1ab58SBarry Smith //********************************************************* 86*b2f1ab58SBarry Smith // 1. Choose appropriate array size 87*b2f1ab58SBarry Smith // Count number of rows and memory usage which is already final 88*b2f1ab58SBarry Smith for (i=0;i<i_row; i++) 89*b2f1ab58SBarry Smith { 90*b2f1ab58SBarry Smith n_alloc_ok += result->row_nnz[i]; 91*b2f1ab58SBarry Smith n_alloc_ok_max += max_row_nnz[i]; 92*b2f1ab58SBarry Smith } 93*b2f1ab58SBarry Smith 94*b2f1ab58SBarry Smith // Count currently needed memory usage and future memory requirements 95*b2f1ab58SBarry Smith // (max, predicted) 96*b2f1ab58SBarry Smith for (i=i_row; i<nrows; i++) 97*b2f1ab58SBarry Smith { 98*b2f1ab58SBarry Smith if (result->row_nnz[i]==0) 99*b2f1ab58SBarry Smith { 100*b2f1ab58SBarry Smith max_need_extra += max_row_nnz[i]; 101*b2f1ab58SBarry Smith } 102*b2f1ab58SBarry Smith else 103*b2f1ab58SBarry Smith { 104*b2f1ab58SBarry Smith need_already += max_row_nnz[i]; 105*b2f1ab58SBarry Smith n_rows_ahead++; 106*b2f1ab58SBarry Smith } 107*b2f1ab58SBarry Smith } 108*b2f1ab58SBarry Smith 109*b2f1ab58SBarry Smith // Make maximal and realistic memory requirement estimates 110*b2f1ab58SBarry Smith n_alloc_max = n_alloc_ok + need_already + max_need_extra; 111*b2f1ab58SBarry Smith n_alloc_est = n_alloc_ok + need_already + (int) 112*b2f1ab58SBarry Smith ((PetscScalar) max_need_extra * 113*b2f1ab58SBarry Smith (PetscScalar) n_alloc_ok / 114*b2f1ab58SBarry Smith (PetscScalar) n_alloc_ok_max); 115*b2f1ab58SBarry Smith 116*b2f1ab58SBarry Smith // Choose array sizes 117*b2f1ab58SBarry Smith if (n_alloc_max == n_alloc_est) 118*b2f1ab58SBarry Smith { n_alloc = n_alloc_max; } 119*b2f1ab58SBarry Smith else if (n_alloc_now >= n_alloc_est) 120*b2f1ab58SBarry Smith { n_alloc = n_alloc_now; } 121*b2f1ab58SBarry Smith else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) 122*b2f1ab58SBarry Smith { n_alloc = n_alloc_max; } 123*b2f1ab58SBarry Smith else 124*b2f1ab58SBarry Smith { n_alloc = (int) (n_alloc_est * (1+xtra_perc/100.0)); } 125*b2f1ab58SBarry Smith 126*b2f1ab58SBarry Smith // If new estimate is less than what we already have, 127*b2f1ab58SBarry Smith // don't reallocate, just garbage-collect 128*b2f1ab58SBarry Smith if (n_alloc_max != n_alloc_est && n_alloc < result->n_alloc_icol) 129*b2f1ab58SBarry Smith { 130*b2f1ab58SBarry Smith n_alloc = result->n_alloc_icol; 131*b2f1ab58SBarry Smith } 132*b2f1ab58SBarry Smith 133*b2f1ab58SBarry Smith #ifdef garbage_debug 134*b2f1ab58SBarry Smith // Print allocation considerations 135*b2f1ab58SBarry Smith printf(" + Final version of rows 1..%d is available\n",i_row); 136*b2f1ab58SBarry Smith printf(" They have %d nonzeros: %6.2f per row\n",n_alloc_ok, 137*b2f1ab58SBarry Smith (PetscScalar) n_alloc_ok / (PetscScalar)i_row); 138*b2f1ab58SBarry Smith printf(" Without droptol, I would have had %d nonzeros\n", 139*b2f1ab58SBarry Smith n_alloc_ok_max); 140*b2f1ab58SBarry Smith printf(" So I use %6.2f%% of the allowed sparseness\n", 141*b2f1ab58SBarry Smith 100.0 * (PetscScalar) n_alloc_ok / 142*b2f1ab58SBarry Smith (PetscScalar) n_alloc_ok_max); 143*b2f1ab58SBarry Smith printf(" + I have %d rows for which I need full sparseness pattern\n", 144*b2f1ab58SBarry Smith n_rows_ahead); 145*b2f1ab58SBarry Smith printf(" They require %d nonzeros\n", 146*b2f1ab58SBarry Smith need_already); 147*b2f1ab58SBarry Smith printf(" + After that, I shall need %d more rows\n", 148*b2f1ab58SBarry Smith nrows-n_rows_ahead-i_row); 149*b2f1ab58SBarry Smith printf(" Maximally, I will need %d nonzeros there\n", 150*b2f1ab58SBarry Smith max_need_extra); 151*b2f1ab58SBarry Smith printf(" Realistically, I expect to need %6.2f nonzeros there\n", 152*b2f1ab58SBarry Smith (PetscScalar) max_need_extra * 153*b2f1ab58SBarry Smith (PetscScalar) n_alloc_ok / 154*b2f1ab58SBarry Smith (PetscScalar) n_alloc_ok_max); 155*b2f1ab58SBarry Smith printf(" + New allocated arrays should be %d (max) or %d (realistic) long\n", 156*b2f1ab58SBarry Smith n_alloc_max, n_alloc_est); 157*b2f1ab58SBarry Smith 158*b2f1ab58SBarry Smith #endif 159*b2f1ab58SBarry Smith // Motivate dimension choice 160*b2f1ab58SBarry Smith printf(" Allocating %d nonzeros: ",n_alloc); 161*b2f1ab58SBarry Smith if (n_alloc_max == n_alloc_est) 162*b2f1ab58SBarry Smith { printf("this is the correct size\n"); } 163*b2f1ab58SBarry Smith else if (n_alloc_now >= n_alloc_est) 164*b2f1ab58SBarry Smith { printf("the current size, which seems enough\n"); } 165*b2f1ab58SBarry Smith else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) 166*b2f1ab58SBarry Smith { printf("the maximum estimate\n"); } 167*b2f1ab58SBarry Smith else 168*b2f1ab58SBarry Smith { printf("%6.2f %% more than the estimate\n",xtra_perc); } 169*b2f1ab58SBarry Smith 170*b2f1ab58SBarry Smith 171*b2f1ab58SBarry Smith //********************************************************* 172*b2f1ab58SBarry Smith // 2. Rescue arrays which would be lost 173*b2f1ab58SBarry Smith // Count how many rows and nonzeros will have to be rescued 174*b2f1ab58SBarry Smith // when moving all arrays in place 175*b2f1ab58SBarry Smith n_row_rescue = 0; n_rescue = 0; 176*b2f1ab58SBarry Smith if (*n_row_alloc_ok==0) { *n_alloc_used = 0; } 177*b2f1ab58SBarry Smith else 178*b2f1ab58SBarry Smith { 179*b2f1ab58SBarry Smith i = *n_row_alloc_ok - 1; 180*b2f1ab58SBarry Smith *n_alloc_used = (result->icols[i]-result->alloc_icol) + 181*b2f1ab58SBarry Smith result->row_nnz[i]; 182*b2f1ab58SBarry Smith } 183*b2f1ab58SBarry Smith 184*b2f1ab58SBarry Smith #ifdef garbage_debug 185*b2f1ab58SBarry Smith printf("\n\n"); 186*b2f1ab58SBarry Smith printf("Rows 0..%d are already OK\n", *n_row_alloc_ok); 187*b2f1ab58SBarry Smith printf("Rows 0..%d have the right dimensions\n", i_row-1); 188*b2f1ab58SBarry Smith #endif 189*b2f1ab58SBarry Smith 190*b2f1ab58SBarry Smith for (i=*n_row_alloc_ok; i<nrows; i++) 191*b2f1ab58SBarry Smith { 192*b2f1ab58SBarry Smith i_here = result->icols[i]-result->alloc_icol; 193*b2f1ab58SBarry Smith i_last = i_here + result->row_nnz[i]; 194*b2f1ab58SBarry Smith if (result->row_nnz[i]>0) 195*b2f1ab58SBarry Smith { 196*b2f1ab58SBarry Smith if (*n_alloc_used > i_here || i_last > n_alloc) 197*b2f1ab58SBarry Smith { 198*b2f1ab58SBarry Smith n_rescue += result->row_nnz[i]; 199*b2f1ab58SBarry Smith n_row_rescue++; 200*b2f1ab58SBarry Smith //printf(" Rescue row %d: %d nonzeros\n",i,result->row_nnz[i]); 201*b2f1ab58SBarry Smith //printf(" (New start at %d, old start at %d)\n", 202*b2f1ab58SBarry Smith // *n_alloc_used, i_here); 203*b2f1ab58SBarry Smith } 204*b2f1ab58SBarry Smith 205*b2f1ab58SBarry Smith if (i<i_row) { *n_alloc_used += result->row_nnz[i];} 206*b2f1ab58SBarry Smith else { *n_alloc_used += max_row_nnz[i];} 207*b2f1ab58SBarry Smith } 208*b2f1ab58SBarry Smith } 209*b2f1ab58SBarry Smith 210*b2f1ab58SBarry Smith #ifdef garbage_debug 211*b2f1ab58SBarry Smith printf(" + %d arrays will have to be rescued: %d nnz\n", 212*b2f1ab58SBarry Smith n_row_rescue, n_rescue); 213*b2f1ab58SBarry Smith #endif 214*b2f1ab58SBarry Smith 215*b2f1ab58SBarry Smith // Allocate rescue arrays 216*b2f1ab58SBarry Smith ierr = PetscMalloc( n_rescue * sizeof(int), &icol_rescue); 217*b2f1ab58SBarry Smith CHKERRQ(ierr); 218*b2f1ab58SBarry Smith ierr = PetscMalloc( n_rescue * sizeof(PetscScalar), &val_rescue); 219*b2f1ab58SBarry Smith CHKERRQ(ierr); 220*b2f1ab58SBarry Smith 221*b2f1ab58SBarry Smith // Rescue the arrays which need rescuing 222*b2f1ab58SBarry Smith n_row_rescue = 0; n_rescue = 0; 223*b2f1ab58SBarry Smith if (*n_row_alloc_ok==0) { *n_alloc_used = 0; } 224*b2f1ab58SBarry Smith else 225*b2f1ab58SBarry Smith { 226*b2f1ab58SBarry Smith i = *n_row_alloc_ok - 1; 227*b2f1ab58SBarry Smith *n_alloc_used = (result->icols[i]-result->alloc_icol) + 228*b2f1ab58SBarry Smith result->row_nnz[i]; 229*b2f1ab58SBarry Smith } 230*b2f1ab58SBarry Smith 231*b2f1ab58SBarry Smith for (i=*n_row_alloc_ok; i<nrows; i++) 232*b2f1ab58SBarry Smith { 233*b2f1ab58SBarry Smith i_here = result->icols[i]-result->alloc_icol; 234*b2f1ab58SBarry Smith i_last = i_here + result->row_nnz[i]; 235*b2f1ab58SBarry Smith if (result->row_nnz[i]>0) 236*b2f1ab58SBarry Smith { 237*b2f1ab58SBarry Smith if (*n_alloc_used > i_here || i_last > n_alloc) 238*b2f1ab58SBarry Smith { 239*b2f1ab58SBarry Smith //printf(" Rescuing row %d to array[%d..]\n",i,n_rescue); 240*b2f1ab58SBarry Smith memcpy((void*) &icol_rescue[n_rescue], 241*b2f1ab58SBarry Smith (void*) result->icols[i], 242*b2f1ab58SBarry Smith result->row_nnz[i] * sizeof(int)); 243*b2f1ab58SBarry Smith memcpy((void*) &val_rescue[n_rescue], 244*b2f1ab58SBarry Smith (void*) result->values[i], 245*b2f1ab58SBarry Smith result->row_nnz[i] * sizeof(PetscScalar)); 246*b2f1ab58SBarry Smith n_rescue += result->row_nnz[i]; 247*b2f1ab58SBarry Smith n_row_rescue++; 248*b2f1ab58SBarry Smith } 249*b2f1ab58SBarry Smith 250*b2f1ab58SBarry Smith if (i<i_row) { *n_alloc_used += result->row_nnz[i];} 251*b2f1ab58SBarry Smith else { *n_alloc_used += max_row_nnz[i];} 252*b2f1ab58SBarry Smith } 253*b2f1ab58SBarry Smith } 254*b2f1ab58SBarry Smith 255*b2f1ab58SBarry Smith //********************************************************* 256*b2f1ab58SBarry Smith // 3. Reallocate and correct administration 257*b2f1ab58SBarry Smith 258*b2f1ab58SBarry Smith if (n_alloc != result->n_alloc_icol) 259*b2f1ab58SBarry Smith { 260*b2f1ab58SBarry Smith n_copy = MIN(n_alloc,result->n_alloc_icol); 261*b2f1ab58SBarry Smith 262*b2f1ab58SBarry Smith // PETSC knows no REALLOC, so we'll REALLOC ourselves. 263*b2f1ab58SBarry Smith 264*b2f1ab58SBarry Smith // Allocate new icol-data, copy old contents 265*b2f1ab58SBarry Smith ierr = PetscMalloc( n_alloc * sizeof(int), 266*b2f1ab58SBarry Smith &result->alloc_icol); CHKERRQ(ierr); 267*b2f1ab58SBarry Smith memcpy(result->alloc_icol, alloc_icol_old, n_copy*sizeof(int)); 268*b2f1ab58SBarry Smith 269*b2f1ab58SBarry Smith // Update administration, Reset pointers to new arrays 270*b2f1ab58SBarry Smith result->n_alloc_icol = n_alloc; 271*b2f1ab58SBarry Smith for (i=0; i<nrows; i++) 272*b2f1ab58SBarry Smith { 273*b2f1ab58SBarry Smith result->icols[i] = result->alloc_icol + 274*b2f1ab58SBarry Smith (result->icols[i] - alloc_icol_old); 275*b2f1ab58SBarry Smith result->values[i] = result->alloc_val + 276*b2f1ab58SBarry Smith (result->icols[i] - result->alloc_icol); 277*b2f1ab58SBarry Smith } 278*b2f1ab58SBarry Smith 279*b2f1ab58SBarry Smith // Delete old array 280*b2f1ab58SBarry Smith ierr = PetscFree(alloc_icol_old); CHKERRQ(ierr); 281*b2f1ab58SBarry Smith 282*b2f1ab58SBarry Smith // Allocate new value-data, copy old contents 283*b2f1ab58SBarry Smith ierr = PetscMalloc( n_alloc * sizeof(PetscScalar), 284*b2f1ab58SBarry Smith &result->alloc_val); CHKERRQ(ierr); 285*b2f1ab58SBarry Smith memcpy(result->alloc_val, alloc_val_old, n_copy*sizeof(PetscScalar)); 286*b2f1ab58SBarry Smith 287*b2f1ab58SBarry Smith // Update administration, Reset pointers to new arrays 288*b2f1ab58SBarry Smith result->n_alloc_val = n_alloc; 289*b2f1ab58SBarry Smith for (i=0; i<nrows; i++) 290*b2f1ab58SBarry Smith { 291*b2f1ab58SBarry Smith result->values[i] = result->alloc_val + 292*b2f1ab58SBarry Smith (result->icols[i] - result->alloc_icol); 293*b2f1ab58SBarry Smith } 294*b2f1ab58SBarry Smith 295*b2f1ab58SBarry Smith // Delete old array 296*b2f1ab58SBarry Smith ierr = PetscFree(alloc_val_old); CHKERRQ(ierr); 297*b2f1ab58SBarry Smith } 298*b2f1ab58SBarry Smith 299*b2f1ab58SBarry Smith 300*b2f1ab58SBarry Smith //********************************************************* 301*b2f1ab58SBarry Smith // 4. Copy all the arrays to their proper places 302*b2f1ab58SBarry Smith n_row_rescue = 0; n_rescue = 0; 303*b2f1ab58SBarry Smith if (*n_row_alloc_ok==0) { *n_alloc_used = 0; } 304*b2f1ab58SBarry Smith else 305*b2f1ab58SBarry Smith { 306*b2f1ab58SBarry Smith i = *n_row_alloc_ok - 1; 307*b2f1ab58SBarry Smith *n_alloc_used = (result->icols[i]-result->alloc_icol) + 308*b2f1ab58SBarry Smith result->row_nnz[i]; 309*b2f1ab58SBarry Smith } 310*b2f1ab58SBarry Smith 311*b2f1ab58SBarry Smith for (i=*n_row_alloc_ok; i<nrows; i++) 312*b2f1ab58SBarry Smith { 313*b2f1ab58SBarry Smith i_here = result->icols[i]-result->alloc_icol; 314*b2f1ab58SBarry Smith i_last = i_here + result->row_nnz[i]; 315*b2f1ab58SBarry Smith 316*b2f1ab58SBarry Smith result->icols[i] = result->alloc_icol + *n_alloc_used; 317*b2f1ab58SBarry Smith result->values[i]= result->alloc_val + *n_alloc_used; 318*b2f1ab58SBarry Smith 319*b2f1ab58SBarry Smith if (result->row_nnz[i]>0) 320*b2f1ab58SBarry Smith { 321*b2f1ab58SBarry Smith if (*n_alloc_used > i_here || i_last > n_alloc) 322*b2f1ab58SBarry Smith { 323*b2f1ab58SBarry Smith //printf(" Copying rescued array[%d..] to row %d\n", 324*b2f1ab58SBarry Smith // n_rescue,i); 325*b2f1ab58SBarry Smith //fflush(stdout); 326*b2f1ab58SBarry Smith memcpy((void*) result->icols[i], 327*b2f1ab58SBarry Smith (void*) &icol_rescue[n_rescue], 328*b2f1ab58SBarry Smith result->row_nnz[i] * sizeof(int)); 329*b2f1ab58SBarry Smith memcpy((void*) result->values[i], 330*b2f1ab58SBarry Smith (void*) &val_rescue[n_rescue], 331*b2f1ab58SBarry Smith result->row_nnz[i] * sizeof(PetscScalar)); 332*b2f1ab58SBarry Smith n_rescue += result->row_nnz[i]; 333*b2f1ab58SBarry Smith n_row_rescue++; 334*b2f1ab58SBarry Smith } 335*b2f1ab58SBarry Smith else 336*b2f1ab58SBarry Smith { 337*b2f1ab58SBarry Smith for (j=0; j<result->row_nnz[i]; j++) 338*b2f1ab58SBarry Smith { 339*b2f1ab58SBarry Smith result->icols[i][j] = result->alloc_icol[i_here+j]; 340*b2f1ab58SBarry Smith result->values[i][j] = result->alloc_val[ i_here+j]; 341*b2f1ab58SBarry Smith } 342*b2f1ab58SBarry Smith } 343*b2f1ab58SBarry Smith if (i<i_row) { *n_alloc_used += result->row_nnz[i];} 344*b2f1ab58SBarry Smith else { *n_alloc_used += max_row_nnz[i];} 345*b2f1ab58SBarry Smith } 346*b2f1ab58SBarry Smith } 347*b2f1ab58SBarry Smith 348*b2f1ab58SBarry Smith // Delete the rescue arrays 349*b2f1ab58SBarry Smith ierr = PetscFree(icol_rescue); CHKERRQ(ierr); 350*b2f1ab58SBarry Smith ierr = PetscFree(val_rescue); CHKERRQ(ierr); 351*b2f1ab58SBarry Smith *n_row_alloc_ok = i_row; 352*b2f1ab58SBarry Smith 353*b2f1ab58SBarry Smith PetscFunctionReturn(0); 354*b2f1ab58SBarry Smith 355*b2f1ab58SBarry Smith } 356*b2f1ab58SBarry Smith 357*b2f1ab58SBarry Smith /* 358*b2f1ab58SBarry Smith spbas_incomplete_cholesky: 359*b2f1ab58SBarry Smith incomplete Cholesky decomposition of a square, symmetric, 360*b2f1ab58SBarry Smith positive definite matrix. 361*b2f1ab58SBarry Smith 362*b2f1ab58SBarry Smith In case negative diagonals are encountered, function returns 363*b2f1ab58SBarry Smith NEGATIVE_DIAGONAL. When this happens, call this function again 364*b2f1ab58SBarry Smith with a larger epsdiag_in, a less sparse pattern, and/or a smaller 365*b2f1ab58SBarry Smith droptol 366*b2f1ab58SBarry Smith */ 367*b2f1ab58SBarry Smith #undef __FUNCT__ 368*b2f1ab58SBarry Smith #define __FUNCT__ "spbas_incomplete_cholesky" 369*b2f1ab58SBarry Smith PetscErrorCode spbas_incomplete_cholesky( 370*b2f1ab58SBarry Smith Mat A, const PetscInt *rip, const PetscInt *riip, 371*b2f1ab58SBarry Smith spbas_matrix pattern, 372*b2f1ab58SBarry Smith PetscScalar droptol, PetscScalar epsdiag_in, 373*b2f1ab58SBarry Smith spbas_matrix * matrix_L) 374*b2f1ab58SBarry Smith { 375*b2f1ab58SBarry Smith 376*b2f1ab58SBarry Smith #undef cholesky_debug 377*b2f1ab58SBarry Smith 378*b2f1ab58SBarry Smith #ifdef cholesky_debug 379*b2f1ab58SBarry Smith PetscInt idebug=0; 380*b2f1ab58SBarry Smith const char * fmt= " (%d,%d) -> %12.8e\n"; 381*b2f1ab58SBarry Smith PetscScalar tmp; 382*b2f1ab58SBarry Smith #endif 383*b2f1ab58SBarry Smith 384*b2f1ab58SBarry Smith PetscInt jL; 385*b2f1ab58SBarry Smith Mat_SeqAIJ *a=(Mat_SeqAIJ*)A->data; 386*b2f1ab58SBarry Smith PetscInt *ai=a->i,*aj=a->j; 387*b2f1ab58SBarry Smith MatScalar *aa=a->a; 388*b2f1ab58SBarry Smith PetscInt nrows, ncols; 389*b2f1ab58SBarry Smith PetscInt *max_row_nnz; 390*b2f1ab58SBarry Smith PetscErrorCode ierr; 391*b2f1ab58SBarry Smith spbas_matrix retval; 392*b2f1ab58SBarry Smith PetscScalar * diag; 393*b2f1ab58SBarry Smith PetscScalar * val; 394*b2f1ab58SBarry Smith PetscScalar * lvec; 395*b2f1ab58SBarry Smith PetscScalar epsdiag; 396*b2f1ab58SBarry Smith PetscInt i,j,k; 397*b2f1ab58SBarry Smith const PetscTruth do_values = PETSC_TRUE; 398*b2f1ab58SBarry Smith PetscInt * r1_icol; PetscScalar *r1_val; 399*b2f1ab58SBarry Smith PetscInt * r_icol; PetscInt r_nnz; PetscScalar *r_val; 400*b2f1ab58SBarry Smith PetscInt * A_icol; PetscInt A_nnz; PetscScalar *A_val; 401*b2f1ab58SBarry Smith PetscInt * p_icol; PetscInt p_nnz; 402*b2f1ab58SBarry Smith PetscInt n_row_alloc_ok = 0; // number of rows which have been stored 403*b2f1ab58SBarry Smith // correctly in the matrix 404*b2f1ab58SBarry Smith PetscInt n_alloc_used = 0; // part of result->icols and result->values 405*b2f1ab58SBarry Smith // which is currently being used 406*b2f1ab58SBarry Smith PetscFunctionBegin; 407*b2f1ab58SBarry Smith 408*b2f1ab58SBarry Smith // Convert the Manteuffel shift from 'fraction of average diagonal' to 409*b2f1ab58SBarry Smith // dimensioned value 410*b2f1ab58SBarry Smith ierr = MatGetSize(A, &nrows, &ncols); CHKERRQ(ierr); 411*b2f1ab58SBarry Smith ierr = MatGetTrace(A, &epsdiag); CHKERRQ(ierr); 412*b2f1ab58SBarry Smith epsdiag *= epsdiag_in / nrows; 413*b2f1ab58SBarry Smith printf(" Dimensioned Manteuffel shift=%e\n", epsdiag); 414*b2f1ab58SBarry Smith printf(" Drop tolerance =%e\n",droptol); 415*b2f1ab58SBarry Smith 416*b2f1ab58SBarry Smith if (droptol<1e-10) {droptol=1e-10;} 417*b2f1ab58SBarry Smith 418*b2f1ab58SBarry Smith // Check dimensions 419*b2f1ab58SBarry Smith if ( (nrows != pattern.nrows) || 420*b2f1ab58SBarry Smith (ncols != pattern.ncols) || 421*b2f1ab58SBarry Smith (ncols != nrows) ) 422*b2f1ab58SBarry Smith { 423*b2f1ab58SBarry Smith SETERRQ( PETSC_ERR_ARG_INCOMP, 424*b2f1ab58SBarry Smith "Dimension error in spbas_incomplete_cholesky\n"); 425*b2f1ab58SBarry Smith } 426*b2f1ab58SBarry Smith 427*b2f1ab58SBarry Smith // Set overall values 428*b2f1ab58SBarry Smith retval.nrows = nrows; 429*b2f1ab58SBarry Smith retval.ncols = nrows; 430*b2f1ab58SBarry Smith retval.nnz = pattern.nnz/10; 431*b2f1ab58SBarry Smith retval.col_idx_type = SPBAS_COLUMN_NUMBERS; 432*b2f1ab58SBarry Smith retval.block_data = PETSC_TRUE; 433*b2f1ab58SBarry Smith 434*b2f1ab58SBarry Smith // Allocate sparseness pattern 435*b2f1ab58SBarry Smith ierr = spbas_allocate_pattern(&retval, do_values); CHKERRQ(ierr); 436*b2f1ab58SBarry Smith // Initial allocation of data 437*b2f1ab58SBarry Smith memset((void*) retval.row_nnz, 0, nrows*sizeof(int)); 438*b2f1ab58SBarry Smith ierr = spbas_allocate_data(&retval); CHKERRQ(ierr); 439*b2f1ab58SBarry Smith retval.nnz = 0; 440*b2f1ab58SBarry Smith 441*b2f1ab58SBarry Smith // Allocate work arrays 442*b2f1ab58SBarry Smith ierr = PetscMalloc(nrows*sizeof(PetscScalar), &diag); CHKERRQ(ierr); 443*b2f1ab58SBarry Smith ierr = PetscMalloc(nrows*sizeof(PetscScalar), &val); CHKERRQ(ierr); 444*b2f1ab58SBarry Smith ierr = PetscMalloc(nrows*sizeof(PetscScalar), &lvec); CHKERRQ(ierr); 445*b2f1ab58SBarry Smith ierr = PetscMalloc(nrows*sizeof(int), &max_row_nnz); CHKERRQ(ierr); 446*b2f1ab58SBarry Smith if (!(diag && val && lvec && max_row_nnz)) 447*b2f1ab58SBarry Smith { 448*b2f1ab58SBarry Smith SETERRQ( PETSC_ERR_MEM, 449*b2f1ab58SBarry Smith "Allocation error in spbas_incomplete_cholesky\n"); 450*b2f1ab58SBarry Smith } 451*b2f1ab58SBarry Smith 452*b2f1ab58SBarry Smith memset((void*) val, 0, nrows*sizeof(PetscScalar)); 453*b2f1ab58SBarry Smith memset((void*) lvec, 0, nrows*sizeof(PetscScalar)); 454*b2f1ab58SBarry Smith memset((void*) max_row_nnz, 0, nrows*sizeof(int)); 455*b2f1ab58SBarry Smith 456*b2f1ab58SBarry Smith // Check correct format of sparseness pattern 457*b2f1ab58SBarry Smith if (pattern.col_idx_type != SPBAS_DIAGONAL_OFFSETS) 458*b2f1ab58SBarry Smith { 459*b2f1ab58SBarry Smith SETERRQ( PETSC_ERR_SUP_SYS, 460*b2f1ab58SBarry Smith "Error in spbas_incomplete_cholesky: must have diagonal offsets in pattern\n"); 461*b2f1ab58SBarry Smith } 462*b2f1ab58SBarry Smith 463*b2f1ab58SBarry Smith // Count the nonzeros on transpose of pattern 464*b2f1ab58SBarry Smith for (i = 0; i<nrows; i++) 465*b2f1ab58SBarry Smith { 466*b2f1ab58SBarry Smith p_nnz = pattern.row_nnz[i]; 467*b2f1ab58SBarry Smith p_icol = pattern.icols[i]; 468*b2f1ab58SBarry Smith for (j=0; j<p_nnz; j++) 469*b2f1ab58SBarry Smith { 470*b2f1ab58SBarry Smith max_row_nnz[i+p_icol[j]]++; 471*b2f1ab58SBarry Smith } 472*b2f1ab58SBarry Smith } 473*b2f1ab58SBarry Smith 474*b2f1ab58SBarry Smith // Calculate rows of L 475*b2f1ab58SBarry Smith for (i = 0; i<nrows; i++) 476*b2f1ab58SBarry Smith { 477*b2f1ab58SBarry Smith p_nnz = pattern.row_nnz[i]; 478*b2f1ab58SBarry Smith p_icol = pattern.icols[i]; 479*b2f1ab58SBarry Smith 480*b2f1ab58SBarry Smith r_nnz = retval.row_nnz[i]; 481*b2f1ab58SBarry Smith r_icol = retval.icols[i]; 482*b2f1ab58SBarry Smith r_val = retval.values[i]; 483*b2f1ab58SBarry Smith A_nnz = ai[rip[i]+1] - ai[rip[i]]; 484*b2f1ab58SBarry Smith A_icol = &aj[ai[rip[i]]]; 485*b2f1ab58SBarry Smith A_val = &aa[ai[rip[i]]]; 486*b2f1ab58SBarry Smith 487*b2f1ab58SBarry Smith // Calculate val += A(i,i:n)'; 488*b2f1ab58SBarry Smith for (j=0; j<A_nnz; j++) 489*b2f1ab58SBarry Smith { 490*b2f1ab58SBarry Smith k = riip[A_icol[j]]; 491*b2f1ab58SBarry Smith if (k>=i) { val[k] = A_val[j]; } 492*b2f1ab58SBarry Smith } 493*b2f1ab58SBarry Smith 494*b2f1ab58SBarry Smith // Add regularization 495*b2f1ab58SBarry Smith val[i] += epsdiag; 496*b2f1ab58SBarry Smith 497*b2f1ab58SBarry Smith // Calculate lvec = diag(D(0:i-1)) * L(0:i-1,i); 498*b2f1ab58SBarry Smith // val(i) = A(i,i) - L(0:i-1,i)' * lvec 499*b2f1ab58SBarry Smith for ( j=0; j<r_nnz; j++) 500*b2f1ab58SBarry Smith { 501*b2f1ab58SBarry Smith k = r_icol[j]; 502*b2f1ab58SBarry Smith lvec[k] = diag[k] * r_val[j]; 503*b2f1ab58SBarry Smith val[i] -= r_val[j] * lvec[k]; 504*b2f1ab58SBarry Smith } 505*b2f1ab58SBarry Smith 506*b2f1ab58SBarry Smith // Calculate the new diagonal 507*b2f1ab58SBarry Smith diag[i] = val[i]; 508*b2f1ab58SBarry Smith if (diag[i]<droptol) 509*b2f1ab58SBarry Smith { 510*b2f1ab58SBarry Smith printf("Error in spbas_incomplete_cholesky:\n"); 511*b2f1ab58SBarry Smith printf("Negative diagonal in row %d\n",i+1); 512*b2f1ab58SBarry Smith 513*b2f1ab58SBarry Smith // Delete the whole matrix at once. 514*b2f1ab58SBarry Smith ierr = spbas_delete(retval); 515*b2f1ab58SBarry Smith return NEGATIVE_DIAGONAL; 516*b2f1ab58SBarry Smith } 517*b2f1ab58SBarry Smith 518*b2f1ab58SBarry Smith #ifdef cholesky_debug 519*b2f1ab58SBarry Smith if (idebug>=3) 520*b2f1ab58SBarry Smith { 521*b2f1ab58SBarry Smith printf("diagD * L (%d,:)=\n",i+1); 522*b2f1ab58SBarry Smith for (j=0; j<r_nnz; j++) 523*b2f1ab58SBarry Smith { 524*b2f1ab58SBarry Smith printf(fmt,r_icol[j]+1,i+1,lvec[r_icol[j]]); 525*b2f1ab58SBarry Smith } 526*b2f1ab58SBarry Smith printf("\n\n"); 527*b2f1ab58SBarry Smith } 528*b2f1ab58SBarry Smith 529*b2f1ab58SBarry Smith if (idebug>=1) { printf("Diag = %12.8e\n\n\n",diag[i]);} 530*b2f1ab58SBarry Smith 531*b2f1ab58SBarry Smith if (idebug>=2) 532*b2f1ab58SBarry Smith { 533*b2f1ab58SBarry Smith printf("L^T * diagD * L=\n"); 534*b2f1ab58SBarry Smith tmp = 0.0; 535*b2f1ab58SBarry Smith for ( j=0; j<r_nnz; j++) 536*b2f1ab58SBarry Smith { 537*b2f1ab58SBarry Smith k = r_icol[j]; 538*b2f1ab58SBarry Smith tmp += r_val[j] * lvec[k]; 539*b2f1ab58SBarry Smith } 540*b2f1ab58SBarry Smith printf(fmt, i+1,i+1,tmp); 541*b2f1ab58SBarry Smith } 542*b2f1ab58SBarry Smith #endif 543*b2f1ab58SBarry Smith 544*b2f1ab58SBarry Smith // If necessary, allocate arrays 545*b2f1ab58SBarry Smith if (r_nnz==0) 546*b2f1ab58SBarry Smith { 547*b2f1ab58SBarry Smith ierr = spbas_cholesky_row_alloc( retval, i, 1, &n_alloc_used); 548*b2f1ab58SBarry Smith if (ierr == PETSC_ERR_MEM) 549*b2f1ab58SBarry Smith { 550*b2f1ab58SBarry Smith ierr = spbas_cholesky_garbage_collect( &retval, i, &n_row_alloc_ok, 551*b2f1ab58SBarry Smith &n_alloc_used, max_row_nnz); 552*b2f1ab58SBarry Smith CHKERRQ(ierr); 553*b2f1ab58SBarry Smith ierr = spbas_cholesky_row_alloc( retval, i, 1, &n_alloc_used); 554*b2f1ab58SBarry Smith } 555*b2f1ab58SBarry Smith CHKERRQ(ierr); 556*b2f1ab58SBarry Smith r_icol = retval.icols[i]; 557*b2f1ab58SBarry Smith r_val = retval.values[i]; 558*b2f1ab58SBarry Smith } 559*b2f1ab58SBarry Smith 560*b2f1ab58SBarry Smith // Now, fill in 561*b2f1ab58SBarry Smith r_icol[r_nnz] = i; 562*b2f1ab58SBarry Smith r_val [r_nnz] = 1.0; 563*b2f1ab58SBarry Smith r_nnz++; 564*b2f1ab58SBarry Smith retval.row_nnz[i]++; 565*b2f1ab58SBarry Smith 566*b2f1ab58SBarry Smith retval.nnz += r_nnz; 567*b2f1ab58SBarry Smith 568*b2f1ab58SBarry Smith // Calculate 569*b2f1ab58SBarry Smith // val(i+1:n) = (A(i,i+1:n)- L(0:i-1,i+1:n)' * lvec)/diag(i) 570*b2f1ab58SBarry Smith for (j=1; j<p_nnz; j++) 571*b2f1ab58SBarry Smith { 572*b2f1ab58SBarry Smith k = i+p_icol[j]; 573*b2f1ab58SBarry Smith r1_icol = retval.icols[k]; 574*b2f1ab58SBarry Smith r1_val = retval.values[k]; 575*b2f1ab58SBarry Smith #ifdef cholesky_debug 576*b2f1ab58SBarry Smith tmp = 0.0; 577*b2f1ab58SBarry Smith #endif 578*b2f1ab58SBarry Smith for (jL=0; jL<retval.row_nnz[k]; jL++) 579*b2f1ab58SBarry Smith { 580*b2f1ab58SBarry Smith val[k] -= r1_val[jL] * lvec[r1_icol[jL]]; 581*b2f1ab58SBarry Smith #ifdef cholesky_debug 582*b2f1ab58SBarry Smith tmp += r1_val[jL] * lvec[r1_icol[jL]]; 583*b2f1ab58SBarry Smith #endif 584*b2f1ab58SBarry Smith } 585*b2f1ab58SBarry Smith #ifdef cholesky_debug 586*b2f1ab58SBarry Smith if (idebug>=2 && tmp!=0) {printf(fmt, k+1,i+1,tmp);} 587*b2f1ab58SBarry Smith #endif 588*b2f1ab58SBarry Smith val[k] /= diag[i]; 589*b2f1ab58SBarry Smith 590*b2f1ab58SBarry Smith if (val[k] > droptol || val[k]< -droptol) 591*b2f1ab58SBarry Smith { 592*b2f1ab58SBarry Smith // If necessary, allocate arrays 593*b2f1ab58SBarry Smith if (retval.row_nnz[k]==0) 594*b2f1ab58SBarry Smith { 595*b2f1ab58SBarry Smith ierr = spbas_cholesky_row_alloc( retval, k, max_row_nnz[k], 596*b2f1ab58SBarry Smith &n_alloc_used); 597*b2f1ab58SBarry Smith if (ierr == PETSC_ERR_MEM) 598*b2f1ab58SBarry Smith { 599*b2f1ab58SBarry Smith ierr = spbas_cholesky_garbage_collect( 600*b2f1ab58SBarry Smith &retval, i, &n_row_alloc_ok, 601*b2f1ab58SBarry Smith &n_alloc_used, max_row_nnz); 602*b2f1ab58SBarry Smith CHKERRQ(ierr); 603*b2f1ab58SBarry Smith ierr = spbas_cholesky_row_alloc( retval, k, max_row_nnz[k], 604*b2f1ab58SBarry Smith &n_alloc_used); 605*b2f1ab58SBarry Smith r_icol = retval.icols[i]; 606*b2f1ab58SBarry Smith r_val = retval.values[i]; 607*b2f1ab58SBarry Smith } 608*b2f1ab58SBarry Smith CHKERRQ(ierr); 609*b2f1ab58SBarry Smith } 610*b2f1ab58SBarry Smith 611*b2f1ab58SBarry Smith // Now, fill in 612*b2f1ab58SBarry Smith retval.icols[k][retval.row_nnz[k]] = i; 613*b2f1ab58SBarry Smith retval.values[k][retval.row_nnz[k]] = val[k]; 614*b2f1ab58SBarry Smith retval.row_nnz[k]++; 615*b2f1ab58SBarry Smith } 616*b2f1ab58SBarry Smith val[k] = 0; 617*b2f1ab58SBarry Smith } 618*b2f1ab58SBarry Smith #ifdef cholesky_debug 619*b2f1ab58SBarry Smith if (idebug>=2) {printf("\n\n");} 620*b2f1ab58SBarry Smith #endif 621*b2f1ab58SBarry Smith 622*b2f1ab58SBarry Smith // Erase the values used in the work arrays 623*b2f1ab58SBarry Smith for (j=0; j<r_nnz; j++) { lvec[r_icol[j]] = 0; } 624*b2f1ab58SBarry Smith 625*b2f1ab58SBarry Smith #ifdef cholesky_debug 626*b2f1ab58SBarry Smith if (idebug>=2) 627*b2f1ab58SBarry Smith { 628*b2f1ab58SBarry Smith printf("new L=\n"); 629*b2f1ab58SBarry Smith for (jL=i; jL<nrows; jL++) 630*b2f1ab58SBarry Smith { 631*b2f1ab58SBarry Smith j = retval.row_nnz[jL]-1; 632*b2f1ab58SBarry Smith if (j>=0) 633*b2f1ab58SBarry Smith { 634*b2f1ab58SBarry Smith k = retval.icols[jL][j]; 635*b2f1ab58SBarry Smith if (k==i) printf(fmt, jL+1,i+1,retval.values[jL][j]); 636*b2f1ab58SBarry Smith } 637*b2f1ab58SBarry Smith } 638*b2f1ab58SBarry Smith printf("\n\n"); 639*b2f1ab58SBarry Smith } 640*b2f1ab58SBarry Smith #endif 641*b2f1ab58SBarry Smith } 642*b2f1ab58SBarry Smith 643*b2f1ab58SBarry Smith ierr=PetscFree(lvec); CHKERRQ(ierr); ierr=PetscFree(val); CHKERRQ(ierr); 644*b2f1ab58SBarry Smith 645*b2f1ab58SBarry Smith ierr = spbas_cholesky_garbage_collect( &retval, nrows, &n_row_alloc_ok, 646*b2f1ab58SBarry Smith &n_alloc_used, max_row_nnz); 647*b2f1ab58SBarry Smith ierr=PetscFree(max_row_nnz); CHKERRQ(ierr); 648*b2f1ab58SBarry Smith 649*b2f1ab58SBarry Smith // Place the inverse of the diagonals in the matrix 650*b2f1ab58SBarry Smith for (i=0; i<nrows; i++) 651*b2f1ab58SBarry Smith { 652*b2f1ab58SBarry Smith r_nnz = retval.row_nnz[i]; 653*b2f1ab58SBarry Smith retval.values[i][r_nnz-1] = 1.0 / diag[i]; 654*b2f1ab58SBarry Smith for (j=0; j<r_nnz-1; j++) 655*b2f1ab58SBarry Smith { 656*b2f1ab58SBarry Smith retval.values[i][j] *= -1; 657*b2f1ab58SBarry Smith } 658*b2f1ab58SBarry Smith } 659*b2f1ab58SBarry Smith ierr=PetscFree(diag); CHKERRQ(ierr); 660*b2f1ab58SBarry Smith 661*b2f1ab58SBarry Smith 662*b2f1ab58SBarry Smith // Return successfully 663*b2f1ab58SBarry Smith *matrix_L = retval; 664*b2f1ab58SBarry Smith 665*b2f1ab58SBarry Smith #ifdef cholesky_debug 666*b2f1ab58SBarry Smith for (i = 0; i<nrows; i++) 667*b2f1ab58SBarry Smith { 668*b2f1ab58SBarry Smith printf("Row %d\n",i+1); 669*b2f1ab58SBarry Smith for (j=0; j<retval.row_nnz[i]; j++) 670*b2f1ab58SBarry Smith { 671*b2f1ab58SBarry Smith printf(fmt, i+1,retval.icols[i][j]+1,retval.values[i][j]); 672*b2f1ab58SBarry Smith } 673*b2f1ab58SBarry Smith } 674*b2f1ab58SBarry Smith #endif 675*b2f1ab58SBarry Smith PetscFunctionReturn(0); 676*b2f1ab58SBarry Smith } 677*b2f1ab58SBarry Smith 678