xref: /petsc/src/mat/impls/aij/seq/bas/spbas_cholesky.h (revision be33224567306cb44901cfc55a41c9126afd0e6e)
1b2f1ab58SBarry Smith 
2b2f1ab58SBarry Smith /*
3b2f1ab58SBarry Smith    spbas_cholesky_row_alloc:
4b2f1ab58SBarry Smith       in the data arrays, find a place where another row may be stored.
5b2f1ab58SBarry Smith       Return PETSC_ERR_MEM if there is insufficient space to store the
6b2f1ab58SBarry Smith       row, so garbage collection and/or re-allocation may be done.
7b2f1ab58SBarry Smith */
8b2f1ab58SBarry Smith #undef __FUNCT__
9b2f1ab58SBarry Smith #define __FUNCT__ "spbas_cholesky_row_alloc"
10b2f1ab58SBarry Smith PetscErrorCode spbas_cholesky_row_alloc(spbas_matrix retval, PetscInt k, PetscInt r_nnz,PetscInt * n_alloc_used)
11b2f1ab58SBarry Smith {
12b2f1ab58SBarry Smith   PetscFunctionBegin;
13b2f1ab58SBarry Smith   retval.icols[k]  = &retval.alloc_icol[*n_alloc_used];
14b2f1ab58SBarry Smith   retval.values[k] = &retval.alloc_val[*n_alloc_used];
15b2f1ab58SBarry Smith   *n_alloc_used   += r_nnz;
169ccc4a9bSBarry Smith   if (*n_alloc_used > retval.n_alloc_icol) PetscFunctionReturn(PETSC_ERR_MEM);
17b2f1ab58SBarry Smith   PetscFunctionReturn(0);
18b2f1ab58SBarry Smith }
19b2f1ab58SBarry Smith 
20b2f1ab58SBarry Smith 
21b2f1ab58SBarry Smith /*
22b2f1ab58SBarry Smith   spbas_cholesky_garbage_collect:
23b2f1ab58SBarry Smith      move the rows which have been calculated so far, as well as
24b2f1ab58SBarry Smith      those currently under construction, to the front of the
25b2f1ab58SBarry Smith      array, while putting them in the proper order.
26b2f1ab58SBarry Smith      When it seems necessary, enlarge the current arrays.
27b2f1ab58SBarry Smith 
28b2f1ab58SBarry Smith      NB: re-allocation is being simulated using
29b2f1ab58SBarry Smith          PetscMalloc, memcpy, PetscFree, because
30b2f1ab58SBarry Smith          PetscRealloc does not seem to exist.
31b2f1ab58SBarry Smith 
32b2f1ab58SBarry Smith */
33b2f1ab58SBarry Smith #undef __FUNCT__
34b2f1ab58SBarry Smith #define __FUNCT__ "spbas_cholesky_garbage_collect"
35*be332245SKarl Rupp PetscErrorCode spbas_cholesky_garbage_collect(spbas_matrix *result,         /* I/O: the Cholesky factor matrix being constructed.
36c328eeadSBarry Smith                                                                                     Only the storage, not the contents of this matrix
37c328eeadSBarry Smith                                                                                     is changed in this function */
38*be332245SKarl Rupp                                               PetscInt     i_row,           /* I  : Number of rows for which the final contents are known */
39c328eeadSBarry Smith                                               PetscInt     *n_row_alloc_ok, /* I/O: Number of rows which are already in their final
40c328eeadSBarry Smith                                                                                     places in the arrays: they need not be moved any more */
41c328eeadSBarry Smith                                               PetscInt     *n_alloc_used,   /* I/O:  */
42*be332245SKarl Rupp                                               PetscInt     *max_row_nnz)    /* I  : Over-estimate of the number of nonzeros needed to store each row */
43b2f1ab58SBarry Smith {
44c328eeadSBarry Smith /* PSEUDO-CODE:
45c328eeadSBarry Smith   1. Choose the appropriate size for the arrays
46c328eeadSBarry Smith   2. Rescue the arrays which would be lost during garbage collection
47c328eeadSBarry Smith   3. Reallocate and correct administration
48c328eeadSBarry Smith   4. Move all arrays so that they are in proper order */
49b2f1ab58SBarry Smith 
50b2f1ab58SBarry Smith   PetscInt        i,j;
51b2f1ab58SBarry Smith   PetscInt        nrows = result->nrows;
52b2f1ab58SBarry Smith   PetscInt        n_alloc_ok=0;
53b2f1ab58SBarry Smith   PetscInt        n_alloc_ok_max=0;
54b2f1ab58SBarry Smith   PetscErrorCode  ierr;
55b2f1ab58SBarry Smith   PetscInt        need_already= 0;
56b2f1ab58SBarry Smith   PetscInt        n_rows_ahead=0;
57b2f1ab58SBarry Smith   PetscInt        max_need_extra= 0;
58b2f1ab58SBarry Smith   PetscInt        n_alloc_max, n_alloc_est, n_alloc;
59b2f1ab58SBarry Smith   PetscInt        n_alloc_now = result->n_alloc_icol;
60b2f1ab58SBarry Smith   PetscInt        *alloc_icol_old = result->alloc_icol;
61b2f1ab58SBarry Smith   PetscScalar     *alloc_val_old  = result->alloc_val;
62b2f1ab58SBarry Smith   PetscInt        *icol_rescue;
63b2f1ab58SBarry Smith   PetscScalar     *val_rescue;
64b2f1ab58SBarry Smith   PetscInt        n_rescue;
65b2f1ab58SBarry Smith   PetscInt        n_row_rescue;
66b2f1ab58SBarry Smith   PetscInt        i_here, i_last, n_copy;
67d36aa34eSBarry Smith   const PetscReal xtra_perc = 20;
68b2f1ab58SBarry Smith 
69b2f1ab58SBarry Smith   PetscFunctionBegin;
70c328eeadSBarry Smith   /*********************************************************
71c328eeadSBarry Smith   1. Choose appropriate array size
72c328eeadSBarry Smith   Count number of rows and memory usage which is already final */
739ccc4a9bSBarry Smith   for (i=0;i<i_row; i++)  {
74b2f1ab58SBarry Smith     n_alloc_ok += result->row_nnz[i];
75b2f1ab58SBarry Smith     n_alloc_ok_max += max_row_nnz[i];
76b2f1ab58SBarry Smith   }
77b2f1ab58SBarry Smith 
78c328eeadSBarry Smith   /* Count currently needed memory usage and future memory requirements
79c328eeadSBarry Smith     (max, predicted)*/
809ccc4a9bSBarry Smith   for (i=i_row; i<nrows; i++) {
819ccc4a9bSBarry Smith     if (!result->row_nnz[i]) {
82b2f1ab58SBarry Smith       max_need_extra  += max_row_nnz[i];
839ccc4a9bSBarry Smith     } else {
84b2f1ab58SBarry Smith       need_already    += max_row_nnz[i];
85b2f1ab58SBarry Smith       n_rows_ahead++;
86b2f1ab58SBarry Smith     }
87b2f1ab58SBarry Smith   }
88b2f1ab58SBarry Smith 
89c328eeadSBarry Smith   /* Make maximal and realistic memory requirement estimates */
90b2f1ab58SBarry Smith   n_alloc_max = n_alloc_ok + need_already + max_need_extra;
919ccc4a9bSBarry Smith   n_alloc_est = n_alloc_ok + need_already + (int) (((PetscReal) max_need_extra) *  ((PetscReal) n_alloc_ok) /((PetscReal) n_alloc_ok_max));
92b2f1ab58SBarry Smith 
93c328eeadSBarry Smith   /* Choose array sizes */
949ccc4a9bSBarry Smith   if (n_alloc_max == n_alloc_est)  { n_alloc = n_alloc_max; }
959ccc4a9bSBarry Smith   else if (n_alloc_now >= n_alloc_est)  { n_alloc = n_alloc_now; }
969ccc4a9bSBarry Smith   else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0))  { n_alloc  = n_alloc_max;  }
979ccc4a9bSBarry Smith   else { n_alloc = (int) (n_alloc_est * (1+xtra_perc/100.0)); }
98b2f1ab58SBarry Smith 
99c328eeadSBarry Smith   /* If new estimate is less than what we already have,
100c328eeadSBarry Smith     don't reallocate, just garbage-collect */
1019ccc4a9bSBarry Smith   if (n_alloc_max != n_alloc_est && n_alloc < result->n_alloc_icol) {
102b2f1ab58SBarry Smith     n_alloc = result->n_alloc_icol;
103b2f1ab58SBarry Smith   }
104b2f1ab58SBarry Smith 
105c328eeadSBarry Smith   /* Motivate dimension choice */
106c328eeadSBarry Smith   ierr = PetscInfo1(PETSC_NULL,"   Allocating %d nonzeros: ",n_alloc);CHKERRQ(ierr);
1079ccc4a9bSBarry Smith   if (n_alloc_max == n_alloc_est) { ierr = PetscInfo(PETSC_NULL,"this is the correct size\n");CHKERRQ(ierr); }
1089ccc4a9bSBarry Smith   else if (n_alloc_now >= n_alloc_est) { ierr = PetscInfo(PETSC_NULL,"the current size, which seems enough\n");CHKERRQ(ierr); }
1099ccc4a9bSBarry Smith   else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) { ierr = PetscInfo(PETSC_NULL,"the maximum estimate\n");CHKERRQ(ierr); }
1109ccc4a9bSBarry Smith   else { ierr = PetscInfo1(PETSC_NULL,"%6.2f %% more than the estimate\n",xtra_perc);CHKERRQ(ierr); }
111b2f1ab58SBarry Smith 
112b2f1ab58SBarry Smith 
113c328eeadSBarry Smith   /**********************************************************
114c328eeadSBarry Smith   2. Rescue arrays which would be lost
115c328eeadSBarry Smith   Count how many rows and nonzeros will have to be rescued
116c328eeadSBarry Smith   when moving all arrays in place */
117b2f1ab58SBarry Smith   n_row_rescue = 0; n_rescue = 0;
118b2f1ab58SBarry Smith   if (*n_row_alloc_ok==0) { *n_alloc_used = 0; }
1199ccc4a9bSBarry Smith   else  {
120b2f1ab58SBarry Smith     i = *n_row_alloc_ok - 1;
1219ccc4a9bSBarry Smith     *n_alloc_used = (result->icols[i]-result->alloc_icol) +  result->row_nnz[i];
122b2f1ab58SBarry Smith   }
123b2f1ab58SBarry Smith 
1249ccc4a9bSBarry Smith   for (i=*n_row_alloc_ok; i<nrows; i++) {
125b2f1ab58SBarry Smith     i_here = result->icols[i]-result->alloc_icol;
126b2f1ab58SBarry Smith     i_last = i_here + result->row_nnz[i];
1279ccc4a9bSBarry Smith     if (result->row_nnz[i]>0) {
1289ccc4a9bSBarry Smith       if (*n_alloc_used > i_here || i_last > n_alloc) {
129b2f1ab58SBarry Smith         n_rescue += result->row_nnz[i];
130b2f1ab58SBarry Smith         n_row_rescue++;
131b2f1ab58SBarry Smith       }
132b2f1ab58SBarry Smith 
133b2f1ab58SBarry Smith       if (i<i_row) {  *n_alloc_used += result->row_nnz[i];}
134b2f1ab58SBarry Smith       else         {  *n_alloc_used += max_row_nnz[i];}
135b2f1ab58SBarry Smith     }
136b2f1ab58SBarry Smith   }
137b2f1ab58SBarry Smith 
138c328eeadSBarry Smith   /* Allocate rescue arrays */
1399ccc4a9bSBarry Smith   ierr = PetscMalloc(n_rescue * sizeof(int), &icol_rescue);CHKERRQ(ierr);
1409ccc4a9bSBarry Smith   ierr = PetscMalloc(n_rescue * sizeof(PetscScalar), &val_rescue);CHKERRQ(ierr);
141b2f1ab58SBarry Smith 
142c328eeadSBarry Smith   /* Rescue the arrays which need rescuing */
143b2f1ab58SBarry Smith   n_row_rescue = 0; n_rescue = 0;
144b2f1ab58SBarry Smith   if (*n_row_alloc_ok==0) { *n_alloc_used = 0; }
1459ccc4a9bSBarry Smith   else {
146b2f1ab58SBarry Smith     i = *n_row_alloc_ok - 1;
1479ccc4a9bSBarry Smith     *n_alloc_used = (result->icols[i]-result->alloc_icol) +  result->row_nnz[i];
148b2f1ab58SBarry Smith   }
149b2f1ab58SBarry Smith 
1509ccc4a9bSBarry Smith   for (i=*n_row_alloc_ok; i<nrows; i++) {
151b2f1ab58SBarry Smith     i_here = result->icols[i]-result->alloc_icol;
152b2f1ab58SBarry Smith     i_last = i_here + result->row_nnz[i];
1539ccc4a9bSBarry Smith     if (result->row_nnz[i]>0) {
1544e1ff37aSBarry Smith       if (*n_alloc_used > i_here || i_last > n_alloc) {
1559ccc4a9bSBarry Smith         ierr = PetscMemcpy((void*) &icol_rescue[n_rescue], (void*) result->icols[i], result->row_nnz[i] * sizeof(int));CHKERRQ(ierr);
1569ccc4a9bSBarry Smith         ierr = PetscMemcpy((void*) &val_rescue[n_rescue], (void*) result->values[i], result->row_nnz[i] * sizeof(PetscScalar));CHKERRQ(ierr);
157b2f1ab58SBarry Smith         n_rescue += result->row_nnz[i];
158b2f1ab58SBarry Smith         n_row_rescue++;
159b2f1ab58SBarry Smith       }
160b2f1ab58SBarry Smith 
161b2f1ab58SBarry Smith       if (i<i_row) {  *n_alloc_used += result->row_nnz[i];}
162b2f1ab58SBarry Smith       else         {  *n_alloc_used += max_row_nnz[i];}
163b2f1ab58SBarry Smith     }
164b2f1ab58SBarry Smith   }
165b2f1ab58SBarry Smith 
166c328eeadSBarry Smith   /**********************************************************
167c328eeadSBarry Smith   3. Reallocate and correct administration */
168b2f1ab58SBarry Smith 
1699ccc4a9bSBarry Smith   if (n_alloc != result->n_alloc_icol)  {
170cc6fc633SBarry Smith     n_copy = PetscMin(n_alloc,result->n_alloc_icol);
171b2f1ab58SBarry Smith 
172c328eeadSBarry Smith     /* PETSC knows no REALLOC, so we'll REALLOC ourselves.
173b2f1ab58SBarry Smith 
174c328eeadSBarry Smith         Allocate new icol-data, copy old contents */
1759ccc4a9bSBarry Smith     ierr = PetscMalloc(n_alloc * sizeof(int), &result->alloc_icol);CHKERRQ(ierr);
1769ccc4a9bSBarry Smith     ierr = PetscMemcpy(result->alloc_icol, alloc_icol_old, n_copy*sizeof(int));CHKERRQ(ierr);
177b2f1ab58SBarry Smith 
178c328eeadSBarry Smith     /* Update administration, Reset pointers to new arrays  */
179b2f1ab58SBarry Smith     result->n_alloc_icol = n_alloc;
1809ccc4a9bSBarry Smith     for (i=0; i<nrows; i++) {
1819ccc4a9bSBarry Smith       result->icols[i] =  result->alloc_icol + (result->icols[i]  - alloc_icol_old);
1829ccc4a9bSBarry Smith       result->values[i] =  result->alloc_val + (result->icols[i]  - result->alloc_icol);
183b2f1ab58SBarry Smith     }
184b2f1ab58SBarry Smith 
185c328eeadSBarry Smith     /* Delete old array */
186b2f1ab58SBarry Smith     ierr = PetscFree(alloc_icol_old);CHKERRQ(ierr);
187b2f1ab58SBarry Smith 
188c328eeadSBarry Smith     /* Allocate new value-data, copy old contents */
1899ccc4a9bSBarry Smith     ierr = PetscMalloc(n_alloc * sizeof(PetscScalar), &result->alloc_val);CHKERRQ(ierr);
1909ccc4a9bSBarry Smith     ierr = PetscMemcpy(result->alloc_val, alloc_val_old, n_copy*sizeof(PetscScalar));CHKERRQ(ierr);
191b2f1ab58SBarry Smith 
192c328eeadSBarry Smith     /* Update administration, Reset pointers to new arrays  */
193b2f1ab58SBarry Smith     result->n_alloc_val  = n_alloc;
1949ccc4a9bSBarry Smith     for (i=0; i<nrows; i++) {
1959ccc4a9bSBarry Smith       result->values[i] =  result->alloc_val + (result->icols[i]  - result->alloc_icol);
196b2f1ab58SBarry Smith     }
197b2f1ab58SBarry Smith 
198c328eeadSBarry Smith     /* Delete old array */
199b2f1ab58SBarry Smith     ierr = PetscFree(alloc_val_old);CHKERRQ(ierr);
200b2f1ab58SBarry Smith   }
201b2f1ab58SBarry Smith 
202b2f1ab58SBarry Smith 
203c328eeadSBarry Smith   /*********************************************************
204c328eeadSBarry Smith   4. Copy all the arrays to their proper places */
205b2f1ab58SBarry Smith   n_row_rescue = 0; n_rescue = 0;
206b2f1ab58SBarry Smith   if (*n_row_alloc_ok==0) { *n_alloc_used = 0; }
2079ccc4a9bSBarry Smith   else {
208b2f1ab58SBarry Smith     i = *n_row_alloc_ok - 1;
2099ccc4a9bSBarry Smith     *n_alloc_used = (result->icols[i]-result->alloc_icol) +  result->row_nnz[i];
210b2f1ab58SBarry Smith   }
211b2f1ab58SBarry Smith 
2129ccc4a9bSBarry Smith   for (i=*n_row_alloc_ok; i<nrows; i++) {
213b2f1ab58SBarry Smith     i_here = result->icols[i]-result->alloc_icol;
214b2f1ab58SBarry Smith     i_last = i_here + result->row_nnz[i];
215b2f1ab58SBarry Smith 
216b2f1ab58SBarry Smith     result->icols[i] = result->alloc_icol + *n_alloc_used;
217b2f1ab58SBarry Smith     result->values[i]= result->alloc_val  + *n_alloc_used;
218b2f1ab58SBarry Smith 
2199ccc4a9bSBarry Smith     if (result->row_nnz[i]>0) {
2209ccc4a9bSBarry Smith       if (*n_alloc_used > i_here || i_last > n_alloc)  {
2219ccc4a9bSBarry Smith         ierr = PetscMemcpy((void*) result->icols[i],  (void*) &icol_rescue[n_rescue], result->row_nnz[i] * sizeof(int));CHKERRQ(ierr);
2229ccc4a9bSBarry Smith         ierr = PetscMemcpy((void*) result->values[i], (void*) &val_rescue[n_rescue],result->row_nnz[i] * sizeof(PetscScalar));CHKERRQ(ierr);
223b2f1ab58SBarry Smith         n_rescue += result->row_nnz[i];
224b2f1ab58SBarry Smith         n_row_rescue++;
2259ccc4a9bSBarry Smith       } else {
2269ccc4a9bSBarry Smith         for (j=0; j<result->row_nnz[i]; j++) {
227b2f1ab58SBarry Smith             result->icols[i][j]   = result->alloc_icol[i_here+j];
228b2f1ab58SBarry Smith             result->values[i][j]  = result->alloc_val[ i_here+j];
229b2f1ab58SBarry Smith         }
230b2f1ab58SBarry Smith       }
231b2f1ab58SBarry Smith       if (i<i_row) {  *n_alloc_used += result->row_nnz[i];}
232b2f1ab58SBarry Smith       else         {  *n_alloc_used += max_row_nnz[i];}
233b2f1ab58SBarry Smith     }
234b2f1ab58SBarry Smith   }
235b2f1ab58SBarry Smith 
236c328eeadSBarry Smith   /* Delete the rescue arrays */
237b2f1ab58SBarry Smith   ierr = PetscFree(icol_rescue);CHKERRQ(ierr);
238b2f1ab58SBarry Smith   ierr = PetscFree(val_rescue);CHKERRQ(ierr);
239b2f1ab58SBarry Smith   *n_row_alloc_ok = i_row;
240b2f1ab58SBarry Smith   PetscFunctionReturn(0);
241b2f1ab58SBarry Smith }
242b2f1ab58SBarry Smith 
243b2f1ab58SBarry Smith /*
244b2f1ab58SBarry Smith   spbas_incomplete_cholesky:
245b2f1ab58SBarry Smith      incomplete Cholesky decomposition of a square, symmetric,
246b2f1ab58SBarry Smith      positive definite matrix.
247b2f1ab58SBarry Smith 
248b2f1ab58SBarry Smith      In case negative diagonals are encountered, function returns
249b2f1ab58SBarry Smith      NEGATIVE_DIAGONAL. When this happens, call this function again
250b2f1ab58SBarry Smith      with a larger epsdiag_in, a less sparse pattern, and/or a smaller
251b2f1ab58SBarry Smith      droptol
252b2f1ab58SBarry Smith */
253b2f1ab58SBarry Smith #undef __FUNCT__
254b2f1ab58SBarry Smith #define __FUNCT__ "spbas_incomplete_cholesky"
2559ccc4a9bSBarry Smith PetscErrorCode spbas_incomplete_cholesky(Mat A, const PetscInt *rip, const PetscInt *riip, spbas_matrix pattern, PetscReal droptol, PetscReal epsdiag_in, spbas_matrix * matrix_L)
256b2f1ab58SBarry Smith {
257b2f1ab58SBarry Smith   PetscInt         jL;
258b2f1ab58SBarry Smith   Mat_SeqAIJ       *a=(Mat_SeqAIJ*)A->data;
259b2f1ab58SBarry Smith   PetscInt         *ai=a->i,*aj=a->j;
260b2f1ab58SBarry Smith   MatScalar        *aa=a->a;
261b2f1ab58SBarry Smith   PetscInt         nrows, ncols;
262b2f1ab58SBarry Smith   PetscInt         *max_row_nnz;
263b2f1ab58SBarry Smith   PetscErrorCode   ierr;
264b2f1ab58SBarry Smith   spbas_matrix     retval;
265b2f1ab58SBarry Smith   PetscScalar      * diag;
266b2f1ab58SBarry Smith   PetscScalar      * val;
267b2f1ab58SBarry Smith   PetscScalar      * lvec;
268b2f1ab58SBarry Smith   PetscScalar      epsdiag;
269b2f1ab58SBarry Smith   PetscInt         i,j,k;
270ace3abfcSBarry Smith   const PetscBool  do_values = PETSC_TRUE;
2719ccc4a9bSBarry Smith   PetscInt         * r1_icol;
2729ccc4a9bSBarry Smith   PetscScalar      *r1_val;
2739ccc4a9bSBarry Smith   PetscInt         * r_icol;
2749ccc4a9bSBarry Smith   PetscInt         r_nnz;
2759ccc4a9bSBarry Smith   PetscScalar      *r_val;
2769ccc4a9bSBarry Smith   PetscInt         * A_icol;
2779ccc4a9bSBarry Smith   PetscInt         A_nnz;
2789ccc4a9bSBarry Smith   PetscScalar      *A_val;
2799ccc4a9bSBarry Smith   PetscInt         * p_icol;
2809ccc4a9bSBarry Smith   PetscInt         p_nnz;
2819ccc4a9bSBarry Smith   PetscInt         n_row_alloc_ok = 0;  /* number of rows which have been stored   correctly in the matrix */
2829ccc4a9bSBarry Smith   PetscInt         n_alloc_used = 0;    /* part of result->icols and result->values   which is currently being used */
283b2f1ab58SBarry Smith 
2849ccc4a9bSBarry Smith   PetscFunctionBegin;
2854e1ff37aSBarry Smith   /* Convert the Manteuffel shift from 'fraction of average diagonal' to   dimensioned value */
286b2f1ab58SBarry Smith   ierr = MatGetSize(A, &nrows, &ncols);CHKERRQ(ierr);
287b2f1ab58SBarry Smith   ierr = MatGetTrace(A, &epsdiag);CHKERRQ(ierr);
288b2f1ab58SBarry Smith   epsdiag *= epsdiag_in / nrows;
2899767453cSBarry Smith   ierr = PetscInfo2(PETSC_NULL,"   Dimensioned Manteuffel shift %G Drop tolerance %G\n", PetscRealPart(epsdiag),droptol);CHKERRQ(ierr);
290b2f1ab58SBarry Smith 
291b2f1ab58SBarry Smith   if (droptol<1e-10) {droptol=1e-10;}
292b2f1ab58SBarry Smith 
293e32f2f54SBarry Smith   if ((nrows != pattern.nrows) || (ncols != pattern.ncols) || (ncols != nrows)) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP,"Dimension error in spbas_incomplete_cholesky\n");
294b2f1ab58SBarry Smith 
295b2f1ab58SBarry Smith   retval.nrows = nrows;
296b2f1ab58SBarry Smith   retval.ncols = nrows;
297b2f1ab58SBarry Smith   retval.nnz   = pattern.nnz/10;
298b2f1ab58SBarry Smith   retval.col_idx_type = SPBAS_COLUMN_NUMBERS;
299b2f1ab58SBarry Smith   retval.block_data = PETSC_TRUE;
300b2f1ab58SBarry Smith 
301b2f1ab58SBarry Smith   ierr =  spbas_allocate_pattern(&retval, do_values);CHKERRQ(ierr);
3024e1ff37aSBarry Smith   ierr = PetscMemzero((void*) retval.row_nnz, nrows*sizeof(int));CHKERRQ(ierr);
303b2f1ab58SBarry Smith   ierr =  spbas_allocate_data(&retval);CHKERRQ(ierr);
304b2f1ab58SBarry Smith   retval.nnz   = 0;
305b2f1ab58SBarry Smith 
306b2f1ab58SBarry Smith   ierr = PetscMalloc(nrows*sizeof(PetscScalar), &diag);CHKERRQ(ierr);
307b2f1ab58SBarry Smith   ierr = PetscMalloc(nrows*sizeof(PetscScalar), &val);CHKERRQ(ierr);
308b2f1ab58SBarry Smith   ierr = PetscMalloc(nrows*sizeof(PetscScalar), &lvec);CHKERRQ(ierr);
309b2f1ab58SBarry Smith   ierr = PetscMalloc(nrows*sizeof(int), &max_row_nnz);CHKERRQ(ierr);
310e32f2f54SBarry Smith   if (!(diag && val && lvec && max_row_nnz))  SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MEM, "Allocation error in spbas_incomplete_cholesky\n");
311b2f1ab58SBarry Smith 
3124e1ff37aSBarry Smith   ierr = PetscMemzero((void*) val, nrows*sizeof(PetscScalar));CHKERRQ(ierr);
3134e1ff37aSBarry Smith   ierr = PetscMemzero((void*) lvec, nrows*sizeof(PetscScalar));CHKERRQ(ierr);
3144e1ff37aSBarry Smith   ierr = PetscMemzero((void*) max_row_nnz, nrows*sizeof(int));CHKERRQ(ierr);
315b2f1ab58SBarry Smith 
316c328eeadSBarry Smith   /* Check correct format of sparseness pattern */
317e32f2f54SBarry Smith   if (pattern.col_idx_type != SPBAS_DIAGONAL_OFFSETS)  SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Error in spbas_incomplete_cholesky: must have diagonal offsets in pattern\n");
318b2f1ab58SBarry Smith 
319c328eeadSBarry Smith   /* Count the nonzeros on transpose of pattern */
3204e1ff37aSBarry Smith   for (i = 0; i<nrows; i++)  {
321b2f1ab58SBarry Smith     p_nnz  = pattern.row_nnz[i];
322b2f1ab58SBarry Smith     p_icol = pattern.icols[i];
3234e1ff37aSBarry Smith     for (j=0; j<p_nnz; j++)  {
324b2f1ab58SBarry Smith         max_row_nnz[i+p_icol[j]]++;
325b2f1ab58SBarry Smith     }
326b2f1ab58SBarry Smith   }
327b2f1ab58SBarry Smith 
328c328eeadSBarry Smith   /* Calculate rows of L */
3294e1ff37aSBarry Smith   for (i = 0; i<nrows; i++)  {
330b2f1ab58SBarry Smith     p_nnz  = pattern.row_nnz[i];
331b2f1ab58SBarry Smith     p_icol = pattern.icols[i];
332b2f1ab58SBarry Smith 
333b2f1ab58SBarry Smith     r_nnz  = retval.row_nnz[i];
334b2f1ab58SBarry Smith     r_icol = retval.icols[i];
335b2f1ab58SBarry Smith     r_val  = retval.values[i];
336b2f1ab58SBarry Smith     A_nnz  = ai[rip[i]+1] - ai[rip[i]];
337b2f1ab58SBarry Smith     A_icol = &aj[ai[rip[i]]];
338b2f1ab58SBarry Smith     A_val  = &aa[ai[rip[i]]];
339b2f1ab58SBarry Smith 
340c328eeadSBarry Smith     /* Calculate  val += A(i,i:n)'; */
3414e1ff37aSBarry Smith     for (j=0; j<A_nnz; j++) {
342b2f1ab58SBarry Smith       k = riip[A_icol[j]];
343b2f1ab58SBarry Smith       if (k>=i) { val[k] = A_val[j]; }
344b2f1ab58SBarry Smith     }
345b2f1ab58SBarry Smith 
346c328eeadSBarry Smith     /*  Add regularization */
347b2f1ab58SBarry Smith     val[i] += epsdiag;
348b2f1ab58SBarry Smith 
349c328eeadSBarry Smith     /* Calculate lvec   = diag(D(0:i-1)) * L(0:i-1,i);
350c328eeadSBarry Smith         val(i) = A(i,i) - L(0:i-1,i)' * lvec */
3514e1ff37aSBarry Smith     for (j=0; j<r_nnz; j++)  {
352b2f1ab58SBarry Smith       k           = r_icol[j];
353b2f1ab58SBarry Smith       lvec[k]     = diag[k] * r_val[j];
354b2f1ab58SBarry Smith       val[i]     -= r_val[j] * lvec[k];
355b2f1ab58SBarry Smith     }
356b2f1ab58SBarry Smith 
357c328eeadSBarry Smith     /* Calculate the new diagonal */
358b2f1ab58SBarry Smith     diag[i] = val[i];
3594e1ff37aSBarry Smith     if (PetscRealPart(diag[i])<droptol)  {
360c328eeadSBarry Smith       ierr = PetscInfo(PETSC_NULL,"Error in spbas_incomplete_cholesky:\n");
361c328eeadSBarry Smith       ierr = PetscInfo1(PETSC_NULL,"Negative diagonal in row %d\n",i+1);
362b2f1ab58SBarry Smith 
363c328eeadSBarry Smith       /* Delete the whole matrix at once. */
364b2f1ab58SBarry Smith       ierr = spbas_delete(retval);
365b2f1ab58SBarry Smith       return NEGATIVE_DIAGONAL;
366b2f1ab58SBarry Smith     }
367b2f1ab58SBarry Smith 
368c328eeadSBarry Smith     /* If necessary, allocate arrays */
3694e1ff37aSBarry Smith     if (r_nnz==0) {
3709ccc4a9bSBarry Smith       ierr = spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used);CHKERRQ(ierr);
3714e1ff37aSBarry Smith       if (ierr == PETSC_ERR_MEM) {
3729ccc4a9bSBarry Smith         ierr = spbas_cholesky_garbage_collect(&retval,  i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr);
373b2f1ab58SBarry Smith         ierr = spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used);
374b2f1ab58SBarry Smith       }
375b2f1ab58SBarry Smith       r_icol = retval.icols[i];
376b2f1ab58SBarry Smith       r_val  = retval.values[i];
377b2f1ab58SBarry Smith     }
378b2f1ab58SBarry Smith 
379c328eeadSBarry Smith     /* Now, fill in */
380b2f1ab58SBarry Smith     r_icol[r_nnz] = i;
381b2f1ab58SBarry Smith     r_val [r_nnz] = 1.0;
382b2f1ab58SBarry Smith     r_nnz++;
383b2f1ab58SBarry Smith     retval.row_nnz[i]++;
384b2f1ab58SBarry Smith 
385b2f1ab58SBarry Smith     retval.nnz += r_nnz;
386b2f1ab58SBarry Smith 
387c328eeadSBarry Smith     /* Calculate
388c328eeadSBarry Smith         val(i+1:n) = (A(i,i+1:n)- L(0:i-1,i+1:n)' * lvec)/diag(i) */
3894e1ff37aSBarry Smith     for (j=1; j<p_nnz; j++)  {
390b2f1ab58SBarry Smith       k = i+p_icol[j];
391b2f1ab58SBarry Smith       r1_icol = retval.icols[k];
392b2f1ab58SBarry Smith       r1_val  = retval.values[k];
3934e1ff37aSBarry Smith       for (jL=0; jL<retval.row_nnz[k]; jL++)   {
394b2f1ab58SBarry Smith         val[k] -= r1_val[jL] * lvec[r1_icol[jL]];
395b2f1ab58SBarry Smith       }
396b2f1ab58SBarry Smith       val[k] /= diag[i];
397b2f1ab58SBarry Smith 
3984e1ff37aSBarry Smith       if (PetscAbsScalar(val[k]) > droptol || PetscAbsScalar(val[k])< -droptol) {
399c328eeadSBarry Smith         /* If necessary, allocate arrays */
4004e1ff37aSBarry Smith         if (retval.row_nnz[k]==0) {
4019ccc4a9bSBarry Smith           ierr = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);
4029ccc4a9bSBarry Smith           if (ierr == PETSC_ERR_MEM) {
4039ccc4a9bSBarry Smith             ierr = spbas_cholesky_garbage_collect(&retval,  i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr);
4049ccc4a9bSBarry Smith             ierr = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);CHKERRQ(ierr);
405b2f1ab58SBarry Smith             r_icol = retval.icols[i];
406b2f1ab58SBarry Smith             r_val  = retval.values[i];
407b2f1ab58SBarry Smith           }
408b2f1ab58SBarry Smith         }
409b2f1ab58SBarry Smith 
410b2f1ab58SBarry Smith         retval.icols[k][retval.row_nnz[k]] = i;
411b2f1ab58SBarry Smith         retval.values[k][retval.row_nnz[k]] = val[k];
412b2f1ab58SBarry Smith         retval.row_nnz[k]++;
413b2f1ab58SBarry Smith       }
414b2f1ab58SBarry Smith       val[k] = 0;
415b2f1ab58SBarry Smith     }
416b2f1ab58SBarry Smith 
41771d55d18SBarry Smith     /* Erase the values used in the work arrays */
418b2f1ab58SBarry Smith     for (j=0; j<r_nnz; j++) { lvec[r_icol[j]] = 0; }
419b2f1ab58SBarry Smith   }
420b2f1ab58SBarry Smith 
4219ccc4a9bSBarry Smith   ierr=PetscFree(lvec);CHKERRQ(ierr);
4229ccc4a9bSBarry Smith   ierr=PetscFree(val);CHKERRQ(ierr);
423b2f1ab58SBarry Smith 
4249ccc4a9bSBarry Smith   ierr = spbas_cholesky_garbage_collect(&retval, nrows, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr);
425b2f1ab58SBarry Smith   ierr=PetscFree(max_row_nnz);CHKERRQ(ierr);
426b2f1ab58SBarry Smith 
427c328eeadSBarry Smith   /* Place the inverse of the diagonals in the matrix */
4289ccc4a9bSBarry Smith   for (i=0; i<nrows; i++) {
429b2f1ab58SBarry Smith     r_nnz = retval.row_nnz[i];
430b2f1ab58SBarry Smith     retval.values[i][r_nnz-1] = 1.0 / diag[i];
4319ccc4a9bSBarry Smith     for (j=0; j<r_nnz-1; j++) {
432b2f1ab58SBarry Smith         retval.values[i][j] *= -1;
433b2f1ab58SBarry Smith     }
434b2f1ab58SBarry Smith   }
435b2f1ab58SBarry Smith   ierr=PetscFree(diag);CHKERRQ(ierr);
436b2f1ab58SBarry Smith   *matrix_L = retval;
437b2f1ab58SBarry Smith   PetscFunctionReturn(0);
438b2f1ab58SBarry Smith }
439b2f1ab58SBarry Smith 
440