1b2f1ab58SBarry Smith 2b2f1ab58SBarry Smith /* 3b2f1ab58SBarry Smith spbas_cholesky_row_alloc: 4b2f1ab58SBarry Smith in the data arrays, find a place where another row may be stored. 5b2f1ab58SBarry Smith Return PETSC_ERR_MEM if there is insufficient space to store the 6b2f1ab58SBarry Smith row, so garbage collection and/or re-allocation may be done. 7b2f1ab58SBarry Smith */ 8b2f1ab58SBarry Smith #undef __FUNCT__ 9b2f1ab58SBarry Smith #define __FUNCT__ "spbas_cholesky_row_alloc" 10b2f1ab58SBarry Smith PetscErrorCode spbas_cholesky_row_alloc(spbas_matrix retval, PetscInt k, PetscInt r_nnz,PetscInt * n_alloc_used) 11b2f1ab58SBarry Smith { 12b2f1ab58SBarry Smith PetscFunctionBegin; 13b2f1ab58SBarry Smith retval.icols[k] = &retval.alloc_icol[*n_alloc_used]; 14b2f1ab58SBarry Smith retval.values[k] = &retval.alloc_val[*n_alloc_used]; 15b2f1ab58SBarry Smith *n_alloc_used += r_nnz; 169ccc4a9bSBarry Smith if (*n_alloc_used > retval.n_alloc_icol) PetscFunctionReturn(PETSC_ERR_MEM); 17b2f1ab58SBarry Smith PetscFunctionReturn(0); 18b2f1ab58SBarry Smith } 19b2f1ab58SBarry Smith 20b2f1ab58SBarry Smith 21b2f1ab58SBarry Smith /* 22b2f1ab58SBarry Smith spbas_cholesky_garbage_collect: 23b2f1ab58SBarry Smith move the rows which have been calculated so far, as well as 24b2f1ab58SBarry Smith those currently under construction, to the front of the 25b2f1ab58SBarry Smith array, while putting them in the proper order. 26b2f1ab58SBarry Smith When it seems necessary, enlarge the current arrays. 27b2f1ab58SBarry Smith 28b2f1ab58SBarry Smith NB: re-allocation is being simulated using 29b2f1ab58SBarry Smith PetscMalloc, memcpy, PetscFree, because 30b2f1ab58SBarry Smith PetscRealloc does not seem to exist. 31b2f1ab58SBarry Smith 32b2f1ab58SBarry Smith */ 33b2f1ab58SBarry Smith #undef __FUNCT__ 34b2f1ab58SBarry Smith #define __FUNCT__ "spbas_cholesky_garbage_collect" 35be332245SKarl Rupp PetscErrorCode spbas_cholesky_garbage_collect(spbas_matrix *result, /* I/O: the Cholesky factor matrix being constructed. 36*2205254eSKarl Rupp Only the storage, not the contents of this matrix is changed in this function */ 37be332245SKarl Rupp PetscInt i_row, /* I : Number of rows for which the final contents are known */ 38c328eeadSBarry Smith PetscInt *n_row_alloc_ok, /* I/O: Number of rows which are already in their final 39c328eeadSBarry Smith places in the arrays: they need not be moved any more */ 40c328eeadSBarry Smith PetscInt *n_alloc_used, /* I/O: */ 41be332245SKarl Rupp PetscInt *max_row_nnz) /* I : Over-estimate of the number of nonzeros needed to store each row */ 42b2f1ab58SBarry Smith { 43c328eeadSBarry Smith /* PSEUDO-CODE: 44c328eeadSBarry Smith 1. Choose the appropriate size for the arrays 45c328eeadSBarry Smith 2. Rescue the arrays which would be lost during garbage collection 46c328eeadSBarry Smith 3. Reallocate and correct administration 47c328eeadSBarry Smith 4. Move all arrays so that they are in proper order */ 48b2f1ab58SBarry Smith 49b2f1ab58SBarry Smith PetscInt i,j; 50b2f1ab58SBarry Smith PetscInt nrows = result->nrows; 51b2f1ab58SBarry Smith PetscInt n_alloc_ok =0; 52b2f1ab58SBarry Smith PetscInt n_alloc_ok_max=0; 53b2f1ab58SBarry Smith PetscErrorCode ierr; 54b2f1ab58SBarry Smith PetscInt need_already = 0; 55b2f1ab58SBarry Smith PetscInt n_rows_ahead =0; 56b2f1ab58SBarry Smith PetscInt max_need_extra= 0; 57b2f1ab58SBarry Smith PetscInt n_alloc_max, n_alloc_est, n_alloc; 58b2f1ab58SBarry Smith PetscInt n_alloc_now = result->n_alloc_icol; 59b2f1ab58SBarry Smith PetscInt *alloc_icol_old = result->alloc_icol; 60b2f1ab58SBarry Smith PetscScalar *alloc_val_old = result->alloc_val; 61b2f1ab58SBarry Smith PetscInt *icol_rescue; 62b2f1ab58SBarry Smith PetscScalar *val_rescue; 63b2f1ab58SBarry Smith PetscInt n_rescue; 64b2f1ab58SBarry Smith PetscInt n_row_rescue; 65b2f1ab58SBarry Smith PetscInt i_here, i_last, n_copy; 66d36aa34eSBarry Smith const PetscReal xtra_perc = 20; 67b2f1ab58SBarry Smith 68b2f1ab58SBarry Smith PetscFunctionBegin; 69c328eeadSBarry Smith /********************************************************* 70c328eeadSBarry Smith 1. Choose appropriate array size 71c328eeadSBarry Smith Count number of rows and memory usage which is already final */ 729ccc4a9bSBarry Smith for (i=0; i<i_row; i++) { 73b2f1ab58SBarry Smith n_alloc_ok += result->row_nnz[i]; 74b2f1ab58SBarry Smith n_alloc_ok_max += max_row_nnz[i]; 75b2f1ab58SBarry Smith } 76b2f1ab58SBarry Smith 77c328eeadSBarry Smith /* Count currently needed memory usage and future memory requirements 78c328eeadSBarry Smith (max, predicted)*/ 799ccc4a9bSBarry Smith for (i=i_row; i<nrows; i++) { 809ccc4a9bSBarry Smith if (!result->row_nnz[i]) { 81b2f1ab58SBarry Smith max_need_extra += max_row_nnz[i]; 829ccc4a9bSBarry Smith } else { 83b2f1ab58SBarry Smith need_already += max_row_nnz[i]; 84b2f1ab58SBarry Smith n_rows_ahead++; 85b2f1ab58SBarry Smith } 86b2f1ab58SBarry Smith } 87b2f1ab58SBarry Smith 88c328eeadSBarry Smith /* Make maximal and realistic memory requirement estimates */ 89b2f1ab58SBarry Smith n_alloc_max = n_alloc_ok + need_already + max_need_extra; 909ccc4a9bSBarry Smith n_alloc_est = n_alloc_ok + need_already + (int) (((PetscReal) max_need_extra) * ((PetscReal) n_alloc_ok) /((PetscReal) n_alloc_ok_max)); 91b2f1ab58SBarry Smith 92c328eeadSBarry Smith /* Choose array sizes */ 93*2205254eSKarl Rupp if (n_alloc_max == n_alloc_est) n_alloc = n_alloc_max; 94*2205254eSKarl Rupp else if (n_alloc_now >= n_alloc_est) n_alloc = n_alloc_now; 95*2205254eSKarl Rupp else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) n_alloc = n_alloc_max; 96*2205254eSKarl Rupp else n_alloc = (int) (n_alloc_est * (1+xtra_perc/100.0)); 97b2f1ab58SBarry Smith 98c328eeadSBarry Smith /* If new estimate is less than what we already have, 99c328eeadSBarry Smith don't reallocate, just garbage-collect */ 1009ccc4a9bSBarry Smith if (n_alloc_max != n_alloc_est && n_alloc < result->n_alloc_icol) { 101b2f1ab58SBarry Smith n_alloc = result->n_alloc_icol; 102b2f1ab58SBarry Smith } 103b2f1ab58SBarry Smith 104c328eeadSBarry Smith /* Motivate dimension choice */ 105c328eeadSBarry Smith ierr = PetscInfo1(PETSC_NULL," Allocating %d nonzeros: ",n_alloc);CHKERRQ(ierr); 106*2205254eSKarl Rupp if (n_alloc_max == n_alloc_est) { 107*2205254eSKarl Rupp ierr = PetscInfo(PETSC_NULL,"this is the correct size\n");CHKERRQ(ierr); 108*2205254eSKarl Rupp } else if (n_alloc_now >= n_alloc_est) { 109*2205254eSKarl Rupp ierr = PetscInfo(PETSC_NULL,"the current size, which seems enough\n");CHKERRQ(ierr); 110*2205254eSKarl Rupp } else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) { 111*2205254eSKarl Rupp ierr = PetscInfo(PETSC_NULL,"the maximum estimate\n");CHKERRQ(ierr); 112*2205254eSKarl Rupp } else { 113*2205254eSKarl Rupp ierr = PetscInfo1(PETSC_NULL,"%6.2f %% more than the estimate\n",xtra_perc);CHKERRQ(ierr); 114*2205254eSKarl Rupp } 115b2f1ab58SBarry Smith 116b2f1ab58SBarry Smith 117c328eeadSBarry Smith /********************************************************** 118c328eeadSBarry Smith 2. Rescue arrays which would be lost 119c328eeadSBarry Smith Count how many rows and nonzeros will have to be rescued 120c328eeadSBarry Smith when moving all arrays in place */ 121b2f1ab58SBarry Smith n_row_rescue = 0; n_rescue = 0; 122*2205254eSKarl Rupp if (*n_row_alloc_ok==0) *n_alloc_used = 0; 1239ccc4a9bSBarry Smith else { 124b2f1ab58SBarry Smith i = *n_row_alloc_ok - 1; 125*2205254eSKarl Rupp 1269ccc4a9bSBarry Smith *n_alloc_used = (result->icols[i]-result->alloc_icol) + result->row_nnz[i]; 127b2f1ab58SBarry Smith } 128b2f1ab58SBarry Smith 1299ccc4a9bSBarry Smith for (i=*n_row_alloc_ok; i<nrows; i++) { 130b2f1ab58SBarry Smith i_here = result->icols[i]-result->alloc_icol; 131b2f1ab58SBarry Smith i_last = i_here + result->row_nnz[i]; 1329ccc4a9bSBarry Smith if (result->row_nnz[i]>0) { 1339ccc4a9bSBarry Smith if (*n_alloc_used > i_here || i_last > n_alloc) { 134b2f1ab58SBarry Smith n_rescue += result->row_nnz[i]; 135b2f1ab58SBarry Smith n_row_rescue++; 136b2f1ab58SBarry Smith } 137b2f1ab58SBarry Smith 138*2205254eSKarl Rupp if (i<i_row) *n_alloc_used += result->row_nnz[i]; 139*2205254eSKarl Rupp else *n_alloc_used += max_row_nnz[i]; 140b2f1ab58SBarry Smith } 141b2f1ab58SBarry Smith } 142b2f1ab58SBarry Smith 143c328eeadSBarry Smith /* Allocate rescue arrays */ 1449ccc4a9bSBarry Smith ierr = PetscMalloc(n_rescue * sizeof(int), &icol_rescue);CHKERRQ(ierr); 1459ccc4a9bSBarry Smith ierr = PetscMalloc(n_rescue * sizeof(PetscScalar), &val_rescue);CHKERRQ(ierr); 146b2f1ab58SBarry Smith 147c328eeadSBarry Smith /* Rescue the arrays which need rescuing */ 148b2f1ab58SBarry Smith n_row_rescue = 0; n_rescue = 0; 149*2205254eSKarl Rupp if (*n_row_alloc_ok==0) *n_alloc_used = 0; 1509ccc4a9bSBarry Smith else { 151b2f1ab58SBarry Smith i = *n_row_alloc_ok - 1; 1529ccc4a9bSBarry Smith *n_alloc_used = (result->icols[i]-result->alloc_icol) + result->row_nnz[i]; 153b2f1ab58SBarry Smith } 154b2f1ab58SBarry Smith 1559ccc4a9bSBarry Smith for (i=*n_row_alloc_ok; i<nrows; i++) { 156b2f1ab58SBarry Smith i_here = result->icols[i]-result->alloc_icol; 157b2f1ab58SBarry Smith i_last = i_here + result->row_nnz[i]; 1589ccc4a9bSBarry Smith if (result->row_nnz[i]>0) { 1594e1ff37aSBarry Smith if (*n_alloc_used > i_here || i_last > n_alloc) { 1609ccc4a9bSBarry Smith ierr = PetscMemcpy((void*) &icol_rescue[n_rescue], (void*) result->icols[i], result->row_nnz[i] * sizeof(int));CHKERRQ(ierr); 1619ccc4a9bSBarry Smith ierr = PetscMemcpy((void*) &val_rescue[n_rescue], (void*) result->values[i], result->row_nnz[i] * sizeof(PetscScalar));CHKERRQ(ierr); 162*2205254eSKarl Rupp 163b2f1ab58SBarry Smith n_rescue += result->row_nnz[i]; 164b2f1ab58SBarry Smith n_row_rescue++; 165b2f1ab58SBarry Smith } 166b2f1ab58SBarry Smith 167*2205254eSKarl Rupp if (i<i_row) *n_alloc_used += result->row_nnz[i]; 168*2205254eSKarl Rupp else *n_alloc_used += max_row_nnz[i]; 169b2f1ab58SBarry Smith } 170b2f1ab58SBarry Smith } 171b2f1ab58SBarry Smith 172c328eeadSBarry Smith /********************************************************** 173c328eeadSBarry Smith 3. Reallocate and correct administration */ 174b2f1ab58SBarry Smith 1759ccc4a9bSBarry Smith if (n_alloc != result->n_alloc_icol) { 176cc6fc633SBarry Smith n_copy = PetscMin(n_alloc,result->n_alloc_icol); 177b2f1ab58SBarry Smith 178c328eeadSBarry Smith /* PETSC knows no REALLOC, so we'll REALLOC ourselves. 179b2f1ab58SBarry Smith 180c328eeadSBarry Smith Allocate new icol-data, copy old contents */ 1819ccc4a9bSBarry Smith ierr = PetscMalloc(n_alloc * sizeof(int), &result->alloc_icol);CHKERRQ(ierr); 1829ccc4a9bSBarry Smith ierr = PetscMemcpy(result->alloc_icol, alloc_icol_old, n_copy*sizeof(int));CHKERRQ(ierr); 183b2f1ab58SBarry Smith 184c328eeadSBarry Smith /* Update administration, Reset pointers to new arrays */ 185b2f1ab58SBarry Smith result->n_alloc_icol = n_alloc; 1869ccc4a9bSBarry Smith for (i=0; i<nrows; i++) { 1879ccc4a9bSBarry Smith result->icols[i] = result->alloc_icol + (result->icols[i] - alloc_icol_old); 1889ccc4a9bSBarry Smith result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol); 189b2f1ab58SBarry Smith } 190b2f1ab58SBarry Smith 191c328eeadSBarry Smith /* Delete old array */ 192b2f1ab58SBarry Smith ierr = PetscFree(alloc_icol_old);CHKERRQ(ierr); 193b2f1ab58SBarry Smith 194c328eeadSBarry Smith /* Allocate new value-data, copy old contents */ 1959ccc4a9bSBarry Smith ierr = PetscMalloc(n_alloc * sizeof(PetscScalar), &result->alloc_val);CHKERRQ(ierr); 1969ccc4a9bSBarry Smith ierr = PetscMemcpy(result->alloc_val, alloc_val_old, n_copy*sizeof(PetscScalar));CHKERRQ(ierr); 197b2f1ab58SBarry Smith 198c328eeadSBarry Smith /* Update administration, Reset pointers to new arrays */ 199b2f1ab58SBarry Smith result->n_alloc_val = n_alloc; 2009ccc4a9bSBarry Smith for (i=0; i<nrows; i++) { 2019ccc4a9bSBarry Smith result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol); 202b2f1ab58SBarry Smith } 203b2f1ab58SBarry Smith 204c328eeadSBarry Smith /* Delete old array */ 205b2f1ab58SBarry Smith ierr = PetscFree(alloc_val_old);CHKERRQ(ierr); 206b2f1ab58SBarry Smith } 207b2f1ab58SBarry Smith 208b2f1ab58SBarry Smith 209c328eeadSBarry Smith /********************************************************* 210c328eeadSBarry Smith 4. Copy all the arrays to their proper places */ 211b2f1ab58SBarry Smith n_row_rescue = 0; n_rescue = 0; 212*2205254eSKarl Rupp if (*n_row_alloc_ok==0) *n_alloc_used = 0; 2139ccc4a9bSBarry Smith else { 214b2f1ab58SBarry Smith i = *n_row_alloc_ok - 1; 215*2205254eSKarl Rupp 2169ccc4a9bSBarry Smith *n_alloc_used = (result->icols[i]-result->alloc_icol) + result->row_nnz[i]; 217b2f1ab58SBarry Smith } 218b2f1ab58SBarry Smith 2199ccc4a9bSBarry Smith for (i=*n_row_alloc_ok; i<nrows; i++) { 220b2f1ab58SBarry Smith i_here = result->icols[i]-result->alloc_icol; 221b2f1ab58SBarry Smith i_last = i_here + result->row_nnz[i]; 222b2f1ab58SBarry Smith 223b2f1ab58SBarry Smith result->icols[i] = result->alloc_icol + *n_alloc_used; 224b2f1ab58SBarry Smith result->values[i]= result->alloc_val + *n_alloc_used; 225b2f1ab58SBarry Smith 2269ccc4a9bSBarry Smith if (result->row_nnz[i]>0) { 2279ccc4a9bSBarry Smith if (*n_alloc_used > i_here || i_last > n_alloc) { 2289ccc4a9bSBarry Smith ierr = PetscMemcpy((void*) result->icols[i], (void*) &icol_rescue[n_rescue], result->row_nnz[i] * sizeof(int));CHKERRQ(ierr); 2299ccc4a9bSBarry Smith ierr = PetscMemcpy((void*) result->values[i], (void*) &val_rescue[n_rescue],result->row_nnz[i] * sizeof(PetscScalar));CHKERRQ(ierr); 230*2205254eSKarl Rupp 231b2f1ab58SBarry Smith n_rescue += result->row_nnz[i]; 232b2f1ab58SBarry Smith n_row_rescue++; 2339ccc4a9bSBarry Smith } else { 2349ccc4a9bSBarry Smith for (j=0; j<result->row_nnz[i]; j++) { 235b2f1ab58SBarry Smith result->icols[i][j] = result->alloc_icol[i_here+j]; 236b2f1ab58SBarry Smith result->values[i][j] = result->alloc_val[i_here+j]; 237b2f1ab58SBarry Smith } 238b2f1ab58SBarry Smith } 239*2205254eSKarl Rupp if (i<i_row) *n_alloc_used += result->row_nnz[i]; 240*2205254eSKarl Rupp else *n_alloc_used += max_row_nnz[i]; 241b2f1ab58SBarry Smith } 242b2f1ab58SBarry Smith } 243b2f1ab58SBarry Smith 244c328eeadSBarry Smith /* Delete the rescue arrays */ 245b2f1ab58SBarry Smith ierr = PetscFree(icol_rescue);CHKERRQ(ierr); 246b2f1ab58SBarry Smith ierr = PetscFree(val_rescue);CHKERRQ(ierr); 247*2205254eSKarl Rupp 248b2f1ab58SBarry Smith *n_row_alloc_ok = i_row; 249b2f1ab58SBarry Smith PetscFunctionReturn(0); 250b2f1ab58SBarry Smith } 251b2f1ab58SBarry Smith 252b2f1ab58SBarry Smith /* 253b2f1ab58SBarry Smith spbas_incomplete_cholesky: 254b2f1ab58SBarry Smith incomplete Cholesky decomposition of a square, symmetric, 255b2f1ab58SBarry Smith positive definite matrix. 256b2f1ab58SBarry Smith 257b2f1ab58SBarry Smith In case negative diagonals are encountered, function returns 258b2f1ab58SBarry Smith NEGATIVE_DIAGONAL. When this happens, call this function again 259b2f1ab58SBarry Smith with a larger epsdiag_in, a less sparse pattern, and/or a smaller 260b2f1ab58SBarry Smith droptol 261b2f1ab58SBarry Smith */ 262b2f1ab58SBarry Smith #undef __FUNCT__ 263b2f1ab58SBarry Smith #define __FUNCT__ "spbas_incomplete_cholesky" 2649ccc4a9bSBarry Smith PetscErrorCode spbas_incomplete_cholesky(Mat A, const PetscInt *rip, const PetscInt *riip, spbas_matrix pattern, PetscReal droptol, PetscReal epsdiag_in, spbas_matrix * matrix_L) 265b2f1ab58SBarry Smith { 266b2f1ab58SBarry Smith PetscInt jL; 267b2f1ab58SBarry Smith Mat_SeqAIJ *a =(Mat_SeqAIJ*)A->data; 268b2f1ab58SBarry Smith PetscInt *ai=a->i,*aj=a->j; 269b2f1ab58SBarry Smith MatScalar *aa=a->a; 270b2f1ab58SBarry Smith PetscInt nrows, ncols; 271b2f1ab58SBarry Smith PetscInt *max_row_nnz; 272b2f1ab58SBarry Smith PetscErrorCode ierr; 273b2f1ab58SBarry Smith spbas_matrix retval; 274b2f1ab58SBarry Smith PetscScalar *diag; 275b2f1ab58SBarry Smith PetscScalar *val; 276b2f1ab58SBarry Smith PetscScalar *lvec; 277b2f1ab58SBarry Smith PetscScalar epsdiag; 278b2f1ab58SBarry Smith PetscInt i,j,k; 279ace3abfcSBarry Smith const PetscBool do_values = PETSC_TRUE; 2809ccc4a9bSBarry Smith PetscInt *r1_icol; 2819ccc4a9bSBarry Smith PetscScalar *r1_val; 2829ccc4a9bSBarry Smith PetscInt *r_icol; 2839ccc4a9bSBarry Smith PetscInt r_nnz; 2849ccc4a9bSBarry Smith PetscScalar *r_val; 2859ccc4a9bSBarry Smith PetscInt *A_icol; 2869ccc4a9bSBarry Smith PetscInt A_nnz; 2879ccc4a9bSBarry Smith PetscScalar *A_val; 2889ccc4a9bSBarry Smith PetscInt *p_icol; 2899ccc4a9bSBarry Smith PetscInt p_nnz; 2909ccc4a9bSBarry Smith PetscInt n_row_alloc_ok = 0; /* number of rows which have been stored correctly in the matrix */ 2919ccc4a9bSBarry Smith PetscInt n_alloc_used = 0; /* part of result->icols and result->values which is currently being used */ 292b2f1ab58SBarry Smith 2939ccc4a9bSBarry Smith PetscFunctionBegin; 2944e1ff37aSBarry Smith /* Convert the Manteuffel shift from 'fraction of average diagonal' to dimensioned value */ 295b2f1ab58SBarry Smith ierr = MatGetSize(A, &nrows, &ncols);CHKERRQ(ierr); 296b2f1ab58SBarry Smith ierr = MatGetTrace(A, &epsdiag);CHKERRQ(ierr); 297*2205254eSKarl Rupp 298b2f1ab58SBarry Smith epsdiag *= epsdiag_in / nrows; 299*2205254eSKarl Rupp 3009767453cSBarry Smith ierr = PetscInfo2(PETSC_NULL," Dimensioned Manteuffel shift %G Drop tolerance %G\n", PetscRealPart(epsdiag),droptol);CHKERRQ(ierr); 301b2f1ab58SBarry Smith 302*2205254eSKarl Rupp if (droptol<1e-10) droptol=1e-10; 303b2f1ab58SBarry Smith 304e32f2f54SBarry Smith if ((nrows != pattern.nrows) || (ncols != pattern.ncols) || (ncols != nrows)) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP,"Dimension error in spbas_incomplete_cholesky\n"); 305b2f1ab58SBarry Smith 306b2f1ab58SBarry Smith retval.nrows = nrows; 307b2f1ab58SBarry Smith retval.ncols = nrows; 308b2f1ab58SBarry Smith retval.nnz = pattern.nnz/10; 309b2f1ab58SBarry Smith retval.col_idx_type = SPBAS_COLUMN_NUMBERS; 310b2f1ab58SBarry Smith retval.block_data = PETSC_TRUE; 311b2f1ab58SBarry Smith 312b2f1ab58SBarry Smith ierr = spbas_allocate_pattern(&retval, do_values);CHKERRQ(ierr); 3134e1ff37aSBarry Smith ierr = PetscMemzero((void*) retval.row_nnz, nrows*sizeof(int));CHKERRQ(ierr); 314b2f1ab58SBarry Smith ierr = spbas_allocate_data(&retval);CHKERRQ(ierr); 315b2f1ab58SBarry Smith retval.nnz = 0; 316b2f1ab58SBarry Smith 317b2f1ab58SBarry Smith ierr = PetscMalloc(nrows*sizeof(PetscScalar), &diag);CHKERRQ(ierr); 318b2f1ab58SBarry Smith ierr = PetscMalloc(nrows*sizeof(PetscScalar), &val);CHKERRQ(ierr); 319b2f1ab58SBarry Smith ierr = PetscMalloc(nrows*sizeof(PetscScalar), &lvec);CHKERRQ(ierr); 320b2f1ab58SBarry Smith ierr = PetscMalloc(nrows*sizeof(int), &max_row_nnz);CHKERRQ(ierr); 321e32f2f54SBarry Smith if (!(diag && val && lvec && max_row_nnz)) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MEM, "Allocation error in spbas_incomplete_cholesky\n"); 322b2f1ab58SBarry Smith 3234e1ff37aSBarry Smith ierr = PetscMemzero((void*) val, nrows*sizeof(PetscScalar));CHKERRQ(ierr); 3244e1ff37aSBarry Smith ierr = PetscMemzero((void*) lvec, nrows*sizeof(PetscScalar));CHKERRQ(ierr); 3254e1ff37aSBarry Smith ierr = PetscMemzero((void*) max_row_nnz, nrows*sizeof(int));CHKERRQ(ierr); 326b2f1ab58SBarry Smith 327c328eeadSBarry Smith /* Check correct format of sparseness pattern */ 328e32f2f54SBarry Smith if (pattern.col_idx_type != SPBAS_DIAGONAL_OFFSETS) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Error in spbas_incomplete_cholesky: must have diagonal offsets in pattern\n"); 329b2f1ab58SBarry Smith 330c328eeadSBarry Smith /* Count the nonzeros on transpose of pattern */ 3314e1ff37aSBarry Smith for (i = 0; i<nrows; i++) { 332b2f1ab58SBarry Smith p_nnz = pattern.row_nnz[i]; 333b2f1ab58SBarry Smith p_icol = pattern.icols[i]; 3344e1ff37aSBarry Smith for (j=0; j<p_nnz; j++) { 335b2f1ab58SBarry Smith max_row_nnz[i+p_icol[j]]++; 336b2f1ab58SBarry Smith } 337b2f1ab58SBarry Smith } 338b2f1ab58SBarry Smith 339c328eeadSBarry Smith /* Calculate rows of L */ 3404e1ff37aSBarry Smith for (i = 0; i<nrows; i++) { 341b2f1ab58SBarry Smith p_nnz = pattern.row_nnz[i]; 342b2f1ab58SBarry Smith p_icol = pattern.icols[i]; 343b2f1ab58SBarry Smith 344b2f1ab58SBarry Smith r_nnz = retval.row_nnz[i]; 345b2f1ab58SBarry Smith r_icol = retval.icols[i]; 346b2f1ab58SBarry Smith r_val = retval.values[i]; 347b2f1ab58SBarry Smith A_nnz = ai[rip[i]+1] - ai[rip[i]]; 348b2f1ab58SBarry Smith A_icol = &aj[ai[rip[i]]]; 349b2f1ab58SBarry Smith A_val = &aa[ai[rip[i]]]; 350b2f1ab58SBarry Smith 351c328eeadSBarry Smith /* Calculate val += A(i,i:n)'; */ 3524e1ff37aSBarry Smith for (j=0; j<A_nnz; j++) { 353b2f1ab58SBarry Smith k = riip[A_icol[j]]; 354*2205254eSKarl Rupp if (k>=i) val[k] = A_val[j]; 355b2f1ab58SBarry Smith } 356b2f1ab58SBarry Smith 357c328eeadSBarry Smith /* Add regularization */ 358b2f1ab58SBarry Smith val[i] += epsdiag; 359b2f1ab58SBarry Smith 360c328eeadSBarry Smith /* Calculate lvec = diag(D(0:i-1)) * L(0:i-1,i); 361c328eeadSBarry Smith val(i) = A(i,i) - L(0:i-1,i)' * lvec */ 3624e1ff37aSBarry Smith for (j=0; j<r_nnz; j++) { 363b2f1ab58SBarry Smith k = r_icol[j]; 364b2f1ab58SBarry Smith lvec[k] = diag[k] * r_val[j]; 365b2f1ab58SBarry Smith val[i] -= r_val[j] * lvec[k]; 366b2f1ab58SBarry Smith } 367b2f1ab58SBarry Smith 368c328eeadSBarry Smith /* Calculate the new diagonal */ 369b2f1ab58SBarry Smith diag[i] = val[i]; 3704e1ff37aSBarry Smith if (PetscRealPart(diag[i])<droptol) { 371c328eeadSBarry Smith ierr = PetscInfo(PETSC_NULL,"Error in spbas_incomplete_cholesky:\n"); 372c328eeadSBarry Smith ierr = PetscInfo1(PETSC_NULL,"Negative diagonal in row %d\n",i+1); 373b2f1ab58SBarry Smith 374c328eeadSBarry Smith /* Delete the whole matrix at once. */ 375b2f1ab58SBarry Smith ierr = spbas_delete(retval); 376b2f1ab58SBarry Smith return NEGATIVE_DIAGONAL; 377b2f1ab58SBarry Smith } 378b2f1ab58SBarry Smith 379c328eeadSBarry Smith /* If necessary, allocate arrays */ 3804e1ff37aSBarry Smith if (r_nnz==0) { 3819ccc4a9bSBarry Smith ierr = spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used);CHKERRQ(ierr); 3824e1ff37aSBarry Smith if (ierr == PETSC_ERR_MEM) { 3839ccc4a9bSBarry Smith ierr = spbas_cholesky_garbage_collect(&retval, i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr); 384b2f1ab58SBarry Smith ierr = spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used); 385b2f1ab58SBarry Smith } 386b2f1ab58SBarry Smith r_icol = retval.icols[i]; 387b2f1ab58SBarry Smith r_val = retval.values[i]; 388b2f1ab58SBarry Smith } 389b2f1ab58SBarry Smith 390c328eeadSBarry Smith /* Now, fill in */ 391b2f1ab58SBarry Smith r_icol[r_nnz] = i; 392b2f1ab58SBarry Smith r_val [r_nnz] = 1.0; 393b2f1ab58SBarry Smith r_nnz++; 394b2f1ab58SBarry Smith retval.row_nnz[i]++; 395b2f1ab58SBarry Smith 396b2f1ab58SBarry Smith retval.nnz += r_nnz; 397b2f1ab58SBarry Smith 398c328eeadSBarry Smith /* Calculate 399c328eeadSBarry Smith val(i+1:n) = (A(i,i+1:n)- L(0:i-1,i+1:n)' * lvec)/diag(i) */ 4004e1ff37aSBarry Smith for (j=1; j<p_nnz; j++) { 401b2f1ab58SBarry Smith k = i+p_icol[j]; 402b2f1ab58SBarry Smith r1_icol = retval.icols[k]; 403b2f1ab58SBarry Smith r1_val = retval.values[k]; 4044e1ff37aSBarry Smith for (jL=0; jL<retval.row_nnz[k]; jL++) { 405b2f1ab58SBarry Smith val[k] -= r1_val[jL] * lvec[r1_icol[jL]]; 406b2f1ab58SBarry Smith } 407b2f1ab58SBarry Smith val[k] /= diag[i]; 408b2f1ab58SBarry Smith 4094e1ff37aSBarry Smith if (PetscAbsScalar(val[k]) > droptol || PetscAbsScalar(val[k])< -droptol) { 410c328eeadSBarry Smith /* If necessary, allocate arrays */ 4114e1ff37aSBarry Smith if (retval.row_nnz[k]==0) { 4129ccc4a9bSBarry Smith ierr = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used); 4139ccc4a9bSBarry Smith if (ierr == PETSC_ERR_MEM) { 4149ccc4a9bSBarry Smith ierr = spbas_cholesky_garbage_collect(&retval, i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr); 4159ccc4a9bSBarry Smith ierr = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);CHKERRQ(ierr); 416b2f1ab58SBarry Smith r_icol = retval.icols[i]; 417b2f1ab58SBarry Smith r_val = retval.values[i]; 418b2f1ab58SBarry Smith } 419b2f1ab58SBarry Smith } 420b2f1ab58SBarry Smith 421b2f1ab58SBarry Smith retval.icols[k][retval.row_nnz[k]] = i; 422b2f1ab58SBarry Smith retval.values[k][retval.row_nnz[k]] = val[k]; 423b2f1ab58SBarry Smith retval.row_nnz[k]++; 424b2f1ab58SBarry Smith } 425b2f1ab58SBarry Smith val[k] = 0; 426b2f1ab58SBarry Smith } 427b2f1ab58SBarry Smith 42871d55d18SBarry Smith /* Erase the values used in the work arrays */ 429*2205254eSKarl Rupp for (j=0; j<r_nnz; j++) lvec[r_icol[j]] = 0; 430b2f1ab58SBarry Smith } 431b2f1ab58SBarry Smith 4329ccc4a9bSBarry Smith ierr=PetscFree(lvec);CHKERRQ(ierr); 4339ccc4a9bSBarry Smith ierr=PetscFree(val);CHKERRQ(ierr); 434b2f1ab58SBarry Smith 4359ccc4a9bSBarry Smith ierr = spbas_cholesky_garbage_collect(&retval, nrows, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr); 436b2f1ab58SBarry Smith ierr = PetscFree(max_row_nnz);CHKERRQ(ierr); 437b2f1ab58SBarry Smith 438c328eeadSBarry Smith /* Place the inverse of the diagonals in the matrix */ 4399ccc4a9bSBarry Smith for (i=0; i<nrows; i++) { 440b2f1ab58SBarry Smith r_nnz = retval.row_nnz[i]; 441*2205254eSKarl Rupp 442b2f1ab58SBarry Smith retval.values[i][r_nnz-1] = 1.0 / diag[i]; 4439ccc4a9bSBarry Smith for (j=0; j<r_nnz-1; j++) { 444b2f1ab58SBarry Smith retval.values[i][j] *= -1; 445b2f1ab58SBarry Smith } 446b2f1ab58SBarry Smith } 447b2f1ab58SBarry Smith ierr = PetscFree(diag);CHKERRQ(ierr); 448b2f1ab58SBarry Smith *matrix_L = retval; 449b2f1ab58SBarry Smith PetscFunctionReturn(0); 450b2f1ab58SBarry Smith } 451b2f1ab58SBarry Smith 452