xref: /petsc/src/mat/impls/aij/seq/bas/spbas_cholesky.h (revision 71d55d18a6cc105fecd21255dfee54cec910a2af)
1b2f1ab58SBarry Smith 
2b2f1ab58SBarry Smith /*
3b2f1ab58SBarry Smith    spbas_cholesky_row_alloc:
4b2f1ab58SBarry Smith       in the data arrays, find a place where another row may be stored.
5b2f1ab58SBarry Smith       Return PETSC_ERR_MEM if there is insufficient space to store the
6b2f1ab58SBarry Smith       row, so garbage collection and/or re-allocation may be done.
7b2f1ab58SBarry Smith */
8b2f1ab58SBarry Smith #undef __FUNCT__
9b2f1ab58SBarry Smith #define __FUNCT__ "spbas_cholesky_row_alloc"
10b2f1ab58SBarry Smith PetscErrorCode spbas_cholesky_row_alloc(spbas_matrix retval, PetscInt k, PetscInt r_nnz,PetscInt * n_alloc_used)
11b2f1ab58SBarry Smith {
12b2f1ab58SBarry Smith    PetscFunctionBegin;
13b2f1ab58SBarry Smith    retval.icols[k]  = &retval.alloc_icol[*n_alloc_used];
14b2f1ab58SBarry Smith    retval.values[k] = &retval.alloc_val[*n_alloc_used];
15b2f1ab58SBarry Smith    *n_alloc_used   += r_nnz;
16b2f1ab58SBarry Smith    if (*n_alloc_used > retval.n_alloc_icol)
17b2f1ab58SBarry Smith    {
18b2f1ab58SBarry Smith       PetscFunctionReturn(PETSC_ERR_MEM);
19b2f1ab58SBarry Smith    }
20b2f1ab58SBarry Smith    else
21b2f1ab58SBarry Smith    {
22b2f1ab58SBarry Smith       PetscFunctionReturn(0);
23b2f1ab58SBarry Smith    }
24b2f1ab58SBarry Smith }
25b2f1ab58SBarry Smith 
26b2f1ab58SBarry Smith 
27b2f1ab58SBarry Smith /*
28b2f1ab58SBarry Smith   spbas_cholesky_garbage_collect:
29b2f1ab58SBarry Smith      move the rows which have been calculated so far, as well as
30b2f1ab58SBarry Smith      those currently under construction, to the front of the
31b2f1ab58SBarry Smith      array, while putting them in the proper order.
32b2f1ab58SBarry Smith      When it seems necessary, enlarge the current arrays.
33b2f1ab58SBarry Smith 
34b2f1ab58SBarry Smith      NB: re-allocation is being simulated using
35b2f1ab58SBarry Smith          PetscMalloc, memcpy, PetscFree, because
36b2f1ab58SBarry Smith          PetscRealloc does not seem to exist.
37b2f1ab58SBarry Smith 
38b2f1ab58SBarry Smith */
39b2f1ab58SBarry Smith #undef __FUNCT__
40b2f1ab58SBarry Smith #define __FUNCT__ "spbas_cholesky_garbage_collect"
41b2f1ab58SBarry Smith PetscErrorCode spbas_cholesky_garbage_collect(
42c328eeadSBarry Smith       spbas_matrix * result, /* I/O: the Cholesky factor matrix being constructed.
43c328eeadSBarry Smith                                    Only the storage, not the contents of this matrix
44c328eeadSBarry Smith                                    is changed in this function */
45c328eeadSBarry Smith       PetscInt i_row,             /* I  : Number of rows for which the final contents are
46c328eeadSBarry Smith 				     known */
47c328eeadSBarry Smith       PetscInt *n_row_alloc_ok,   /* I/O: Number of rows which are already in their final
48c328eeadSBarry Smith 				     places in the arrays: they need not be moved any more */
49c328eeadSBarry Smith       PetscInt *n_alloc_used,     /* I/O:  */
50c328eeadSBarry Smith       PetscInt *max_row_nnz       /*I  : Over-estimate of the number of nonzeros needed to
51c328eeadSBarry Smith 				    store each row */
52b2f1ab58SBarry Smith      )
53b2f1ab58SBarry Smith {
54b2f1ab58SBarry Smith 
55b2f1ab58SBarry Smith 
56c328eeadSBarry Smith   /* PSEUDO-CODE:
57c328eeadSBarry Smith     1. Choose the appropriate size for the arrays
58c328eeadSBarry Smith     2. Rescue the arrays which would be lost during garbage collection
59c328eeadSBarry Smith     3. Reallocate and correct administration
60c328eeadSBarry Smith     4. Move all arrays so that they are in proper order */
61b2f1ab58SBarry Smith 
62b2f1ab58SBarry Smith    PetscInt i,j;
63b2f1ab58SBarry Smith    PetscInt nrows = result->nrows;
64b2f1ab58SBarry Smith    PetscInt n_alloc_ok=0;
65b2f1ab58SBarry Smith    PetscInt n_alloc_ok_max=0;
66b2f1ab58SBarry Smith 
67b2f1ab58SBarry Smith    PetscErrorCode ierr;
68b2f1ab58SBarry Smith    PetscInt need_already= 0;
69b2f1ab58SBarry Smith    PetscInt n_rows_ahead=0;
70b2f1ab58SBarry Smith    PetscInt max_need_extra= 0;
71b2f1ab58SBarry Smith    PetscInt n_alloc_max, n_alloc_est, n_alloc;
72b2f1ab58SBarry Smith    PetscInt n_alloc_now = result->n_alloc_icol;
73b2f1ab58SBarry Smith    PetscInt *alloc_icol_old = result->alloc_icol;
74b2f1ab58SBarry Smith    PetscScalar *alloc_val_old  = result->alloc_val;
75b2f1ab58SBarry Smith    PetscInt *icol_rescue;
76b2f1ab58SBarry Smith    PetscScalar *val_rescue;
77b2f1ab58SBarry Smith    PetscInt n_rescue;
78b2f1ab58SBarry Smith    PetscInt n_row_rescue;
79b2f1ab58SBarry Smith    PetscInt i_here, i_last, n_copy;
80d36aa34eSBarry Smith    const PetscReal xtra_perc = 20;
81b2f1ab58SBarry Smith 
82b2f1ab58SBarry Smith    PetscFunctionBegin;
83b2f1ab58SBarry Smith 
84c328eeadSBarry Smith    /*********************************************************
85c328eeadSBarry Smith     1. Choose appropriate array size
86c328eeadSBarry Smith     Count number of rows and memory usage which is already final */
87b2f1ab58SBarry Smith    for (i=0;i<i_row; i++)
88b2f1ab58SBarry Smith    {
89b2f1ab58SBarry Smith       n_alloc_ok += result->row_nnz[i];
90b2f1ab58SBarry Smith       n_alloc_ok_max += max_row_nnz[i];
91b2f1ab58SBarry Smith    }
92b2f1ab58SBarry Smith 
93c328eeadSBarry Smith    /* Count currently needed memory usage and future memory requirements
94c328eeadSBarry Smith       (max, predicted)*/
95b2f1ab58SBarry Smith    for (i=i_row; i<nrows; i++)
96b2f1ab58SBarry Smith    {
97b2f1ab58SBarry Smith       if (result->row_nnz[i]==0)
98b2f1ab58SBarry Smith       {
99b2f1ab58SBarry Smith          max_need_extra  += max_row_nnz[i];
100b2f1ab58SBarry Smith       }
101b2f1ab58SBarry Smith       else
102b2f1ab58SBarry Smith       {
103b2f1ab58SBarry Smith          need_already    += max_row_nnz[i];
104b2f1ab58SBarry Smith          n_rows_ahead++;
105b2f1ab58SBarry Smith       }
106b2f1ab58SBarry Smith    }
107b2f1ab58SBarry Smith 
108c328eeadSBarry Smith    /* Make maximal and realistic memory requirement estimates */
109b2f1ab58SBarry Smith    n_alloc_max = n_alloc_ok + need_already + max_need_extra;
110d36aa34eSBarry Smith    n_alloc_est = n_alloc_ok + need_already + (int) (((PetscReal) max_need_extra) *
111d36aa34eSBarry Smith 						    ((PetscReal) n_alloc_ok) /((PetscReal) n_alloc_ok_max));
112b2f1ab58SBarry Smith 
113c328eeadSBarry Smith    /* Choose array sizes */
114b2f1ab58SBarry Smith    if (n_alloc_max == n_alloc_est)
115b2f1ab58SBarry Smith       { n_alloc = n_alloc_max; }
116b2f1ab58SBarry Smith    else if (n_alloc_now >= n_alloc_est)
117b2f1ab58SBarry Smith       { n_alloc = n_alloc_now; }
118b2f1ab58SBarry Smith    else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0))
119b2f1ab58SBarry Smith       { n_alloc  = n_alloc_max;  }
120b2f1ab58SBarry Smith    else
121b2f1ab58SBarry Smith       { n_alloc = (int) (n_alloc_est * (1+xtra_perc/100.0)); }
122b2f1ab58SBarry Smith 
123c328eeadSBarry Smith    /* If new estimate is less than what we already have,
124c328eeadSBarry Smith       don't reallocate, just garbage-collect */
125b2f1ab58SBarry Smith    if (n_alloc_max != n_alloc_est && n_alloc < result->n_alloc_icol)
126b2f1ab58SBarry Smith    {
127b2f1ab58SBarry Smith       n_alloc = result->n_alloc_icol;
128b2f1ab58SBarry Smith    }
129b2f1ab58SBarry Smith 
130c328eeadSBarry Smith    /* Motivate dimension choice */
131c328eeadSBarry Smith    ierr = PetscInfo1(PETSC_NULL,"   Allocating %d nonzeros: ",n_alloc);CHKERRQ(ierr);
132b2f1ab58SBarry Smith    if (n_alloc_max == n_alloc_est)
133c328eeadSBarry Smith       { ierr = PetscInfo(PETSC_NULL,"this is the correct size\n");CHKERRQ(ierr); }
134b2f1ab58SBarry Smith    else if (n_alloc_now >= n_alloc_est)
135c328eeadSBarry Smith       { ierr = PetscInfo(PETSC_NULL,"the current size, which seems enough\n");CHKERRQ(ierr); }
136b2f1ab58SBarry Smith    else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0))
137c328eeadSBarry Smith       { ierr = PetscInfo(PETSC_NULL,"the maximum estimate\n");CHKERRQ(ierr); }
138b2f1ab58SBarry Smith    else
139c328eeadSBarry Smith       { ierr = PetscInfo1(PETSC_NULL,"%6.2f %% more than the estimate\n",xtra_perc);CHKERRQ(ierr); }
140b2f1ab58SBarry Smith 
141b2f1ab58SBarry Smith 
142c328eeadSBarry Smith    /**********************************************************
143c328eeadSBarry Smith     2. Rescue arrays which would be lost
144c328eeadSBarry Smith     Count how many rows and nonzeros will have to be rescued
145c328eeadSBarry Smith     when moving all arrays in place */
146b2f1ab58SBarry Smith    n_row_rescue = 0; n_rescue = 0;
147b2f1ab58SBarry Smith    if (*n_row_alloc_ok==0) { *n_alloc_used = 0; }
148b2f1ab58SBarry Smith    else
149b2f1ab58SBarry Smith    {
150b2f1ab58SBarry Smith       i = *n_row_alloc_ok - 1;
151b2f1ab58SBarry Smith       *n_alloc_used = (result->icols[i]-result->alloc_icol) +
152b2f1ab58SBarry Smith                        result->row_nnz[i];
153b2f1ab58SBarry Smith    }
154b2f1ab58SBarry Smith 
155b2f1ab58SBarry Smith    for (i=*n_row_alloc_ok; i<nrows; i++)
156b2f1ab58SBarry Smith    {
157b2f1ab58SBarry Smith       i_here = result->icols[i]-result->alloc_icol;
158b2f1ab58SBarry Smith       i_last = i_here + result->row_nnz[i];
159b2f1ab58SBarry Smith       if (result->row_nnz[i]>0)
160b2f1ab58SBarry Smith       {
161b2f1ab58SBarry Smith          if (*n_alloc_used > i_here || i_last > n_alloc)
162b2f1ab58SBarry Smith          {
163b2f1ab58SBarry Smith             n_rescue += result->row_nnz[i];
164b2f1ab58SBarry Smith             n_row_rescue++;
165b2f1ab58SBarry Smith          }
166b2f1ab58SBarry Smith 
167b2f1ab58SBarry Smith          if (i<i_row) {  *n_alloc_used += result->row_nnz[i];}
168b2f1ab58SBarry Smith          else         {  *n_alloc_used += max_row_nnz[i];}
169b2f1ab58SBarry Smith       }
170b2f1ab58SBarry Smith    }
171b2f1ab58SBarry Smith 
172c328eeadSBarry Smith    /* Allocate rescue arrays */
173b2f1ab58SBarry Smith    ierr = PetscMalloc( n_rescue * sizeof(int), &icol_rescue);
174b2f1ab58SBarry Smith    CHKERRQ(ierr);
175b2f1ab58SBarry Smith    ierr = PetscMalloc( n_rescue * sizeof(PetscScalar), &val_rescue);
176b2f1ab58SBarry Smith    CHKERRQ(ierr);
177b2f1ab58SBarry Smith 
178c328eeadSBarry Smith    /* Rescue the arrays which need rescuing */
179b2f1ab58SBarry Smith    n_row_rescue = 0; n_rescue = 0;
180b2f1ab58SBarry Smith    if (*n_row_alloc_ok==0) { *n_alloc_used = 0; }
181b2f1ab58SBarry Smith    else
182b2f1ab58SBarry Smith    {
183b2f1ab58SBarry Smith       i = *n_row_alloc_ok - 1;
184b2f1ab58SBarry Smith       *n_alloc_used = (result->icols[i]-result->alloc_icol) +
185b2f1ab58SBarry Smith                        result->row_nnz[i];
186b2f1ab58SBarry Smith    }
187b2f1ab58SBarry Smith 
188b2f1ab58SBarry Smith    for (i=*n_row_alloc_ok; i<nrows; i++)
189b2f1ab58SBarry Smith    {
190b2f1ab58SBarry Smith       i_here = result->icols[i]-result->alloc_icol;
191b2f1ab58SBarry Smith       i_last = i_here + result->row_nnz[i];
192b2f1ab58SBarry Smith       if (result->row_nnz[i]>0)
193b2f1ab58SBarry Smith       {
194b2f1ab58SBarry Smith          if (*n_alloc_used > i_here || i_last > n_alloc)
195b2f1ab58SBarry Smith          {
196b2f1ab58SBarry Smith             memcpy((void*) &icol_rescue[n_rescue],
197b2f1ab58SBarry Smith                    (void*) result->icols[i],
198b2f1ab58SBarry Smith                     result->row_nnz[i] * sizeof(int));
199b2f1ab58SBarry Smith             memcpy((void*) &val_rescue[n_rescue],
200b2f1ab58SBarry Smith                    (void*) result->values[i],
201b2f1ab58SBarry Smith                    result->row_nnz[i] * sizeof(PetscScalar));
202b2f1ab58SBarry Smith             n_rescue += result->row_nnz[i];
203b2f1ab58SBarry Smith             n_row_rescue++;
204b2f1ab58SBarry Smith          }
205b2f1ab58SBarry Smith 
206b2f1ab58SBarry Smith          if (i<i_row) {  *n_alloc_used += result->row_nnz[i];}
207b2f1ab58SBarry Smith          else         {  *n_alloc_used += max_row_nnz[i];}
208b2f1ab58SBarry Smith       }
209b2f1ab58SBarry Smith    }
210b2f1ab58SBarry Smith 
211c328eeadSBarry Smith    /**********************************************************
212c328eeadSBarry Smith     3. Reallocate and correct administration */
213b2f1ab58SBarry Smith 
214b2f1ab58SBarry Smith    if (n_alloc != result->n_alloc_icol)
215b2f1ab58SBarry Smith    {
216b2f1ab58SBarry Smith       n_copy = MIN(n_alloc,result->n_alloc_icol);
217b2f1ab58SBarry Smith 
218c328eeadSBarry Smith       /* PETSC knows no REALLOC, so we'll REALLOC ourselves.
219b2f1ab58SBarry Smith 
220c328eeadSBarry Smith 	 Allocate new icol-data, copy old contents */
221b2f1ab58SBarry Smith       ierr = PetscMalloc(  n_alloc * sizeof(int),
222b2f1ab58SBarry Smith                            &result->alloc_icol); CHKERRQ(ierr);
223b2f1ab58SBarry Smith       memcpy(result->alloc_icol, alloc_icol_old, n_copy*sizeof(int));
224b2f1ab58SBarry Smith 
225c328eeadSBarry Smith       /* Update administration, Reset pointers to new arrays  */
226b2f1ab58SBarry Smith       result->n_alloc_icol = n_alloc;
227b2f1ab58SBarry Smith       for (i=0; i<nrows; i++)
228b2f1ab58SBarry Smith       {
229b2f1ab58SBarry Smith           result->icols[i] =  result->alloc_icol +
230b2f1ab58SBarry Smith                   (result->icols[i]  - alloc_icol_old);
231b2f1ab58SBarry Smith           result->values[i] =  result->alloc_val +
232b2f1ab58SBarry Smith                   (result->icols[i]  - result->alloc_icol);
233b2f1ab58SBarry Smith       }
234b2f1ab58SBarry Smith 
235c328eeadSBarry Smith       /* Delete old array */
236b2f1ab58SBarry Smith       ierr = PetscFree(alloc_icol_old);  CHKERRQ(ierr);
237b2f1ab58SBarry Smith 
238c328eeadSBarry Smith       /* Allocate new value-data, copy old contents */
239b2f1ab58SBarry Smith       ierr = PetscMalloc(  n_alloc * sizeof(PetscScalar),
240b2f1ab58SBarry Smith                            &result->alloc_val); CHKERRQ(ierr);
241b2f1ab58SBarry Smith       memcpy(result->alloc_val, alloc_val_old, n_copy*sizeof(PetscScalar));
242b2f1ab58SBarry Smith 
243c328eeadSBarry Smith       /* Update administration, Reset pointers to new arrays  */
244b2f1ab58SBarry Smith       result->n_alloc_val  = n_alloc;
245b2f1ab58SBarry Smith       for (i=0; i<nrows; i++)
246b2f1ab58SBarry Smith       {
247b2f1ab58SBarry Smith           result->values[i] =  result->alloc_val +
248b2f1ab58SBarry Smith                   (result->icols[i]  - result->alloc_icol);
249b2f1ab58SBarry Smith       }
250b2f1ab58SBarry Smith 
251c328eeadSBarry Smith       /* Delete old array */
252b2f1ab58SBarry Smith       ierr = PetscFree(alloc_val_old);  CHKERRQ(ierr);
253b2f1ab58SBarry Smith    }
254b2f1ab58SBarry Smith 
255b2f1ab58SBarry Smith 
256c328eeadSBarry Smith    /*********************************************************
257c328eeadSBarry Smith     4. Copy all the arrays to their proper places */
258b2f1ab58SBarry Smith    n_row_rescue = 0; n_rescue = 0;
259b2f1ab58SBarry Smith    if (*n_row_alloc_ok==0) { *n_alloc_used = 0; }
260b2f1ab58SBarry Smith    else
261b2f1ab58SBarry Smith    {
262b2f1ab58SBarry Smith       i = *n_row_alloc_ok - 1;
263b2f1ab58SBarry Smith       *n_alloc_used = (result->icols[i]-result->alloc_icol) +
264b2f1ab58SBarry Smith                        result->row_nnz[i];
265b2f1ab58SBarry Smith    }
266b2f1ab58SBarry Smith 
267b2f1ab58SBarry Smith    for (i=*n_row_alloc_ok; i<nrows; i++)
268b2f1ab58SBarry Smith    {
269b2f1ab58SBarry Smith       i_here = result->icols[i]-result->alloc_icol;
270b2f1ab58SBarry Smith       i_last = i_here + result->row_nnz[i];
271b2f1ab58SBarry Smith 
272b2f1ab58SBarry Smith       result->icols[i] = result->alloc_icol + *n_alloc_used;
273b2f1ab58SBarry Smith       result->values[i]= result->alloc_val  + *n_alloc_used;
274b2f1ab58SBarry Smith 
275b2f1ab58SBarry Smith       if (result->row_nnz[i]>0)
276b2f1ab58SBarry Smith       {
277b2f1ab58SBarry Smith          if (*n_alloc_used > i_here || i_last > n_alloc)
278b2f1ab58SBarry Smith          {
279b2f1ab58SBarry Smith             memcpy((void*) result->icols[i],
280b2f1ab58SBarry Smith                    (void*) &icol_rescue[n_rescue],
281b2f1ab58SBarry Smith                     result->row_nnz[i] * sizeof(int));
282b2f1ab58SBarry Smith             memcpy((void*) result->values[i],
283b2f1ab58SBarry Smith                    (void*) &val_rescue[n_rescue],
284b2f1ab58SBarry Smith                     result->row_nnz[i] * sizeof(PetscScalar));
285b2f1ab58SBarry Smith             n_rescue += result->row_nnz[i];
286b2f1ab58SBarry Smith             n_row_rescue++;
287b2f1ab58SBarry Smith          }
288b2f1ab58SBarry Smith          else
289b2f1ab58SBarry Smith          {
290b2f1ab58SBarry Smith             for (j=0; j<result->row_nnz[i]; j++)
291b2f1ab58SBarry Smith             {
292b2f1ab58SBarry Smith                 result->icols[i][j]   = result->alloc_icol[i_here+j];
293b2f1ab58SBarry Smith                 result->values[i][j]  = result->alloc_val[ i_here+j];
294b2f1ab58SBarry Smith             }
295b2f1ab58SBarry Smith          }
296b2f1ab58SBarry Smith          if (i<i_row) {  *n_alloc_used += result->row_nnz[i];}
297b2f1ab58SBarry Smith          else         {  *n_alloc_used += max_row_nnz[i];}
298b2f1ab58SBarry Smith       }
299b2f1ab58SBarry Smith    }
300b2f1ab58SBarry Smith 
301c328eeadSBarry Smith    /* Delete the rescue arrays */
302b2f1ab58SBarry Smith    ierr = PetscFree(icol_rescue); CHKERRQ(ierr);
303b2f1ab58SBarry Smith    ierr = PetscFree(val_rescue);  CHKERRQ(ierr);
304b2f1ab58SBarry Smith    *n_row_alloc_ok = i_row;
305b2f1ab58SBarry Smith 
306b2f1ab58SBarry Smith    PetscFunctionReturn(0);
307b2f1ab58SBarry Smith 
308b2f1ab58SBarry Smith }
309b2f1ab58SBarry Smith 
310b2f1ab58SBarry Smith /*
311b2f1ab58SBarry Smith   spbas_incomplete_cholesky:
312b2f1ab58SBarry Smith      incomplete Cholesky decomposition of a square, symmetric,
313b2f1ab58SBarry Smith      positive definite matrix.
314b2f1ab58SBarry Smith 
315b2f1ab58SBarry Smith      In case negative diagonals are encountered, function returns
316b2f1ab58SBarry Smith      NEGATIVE_DIAGONAL. When this happens, call this function again
317b2f1ab58SBarry Smith      with a larger epsdiag_in, a less sparse pattern, and/or a smaller
318b2f1ab58SBarry Smith      droptol
319b2f1ab58SBarry Smith */
320b2f1ab58SBarry Smith #undef __FUNCT__
321b2f1ab58SBarry Smith #define __FUNCT__ "spbas_incomplete_cholesky"
322b2f1ab58SBarry Smith PetscErrorCode spbas_incomplete_cholesky(
323b2f1ab58SBarry Smith         Mat A, const PetscInt *rip, const PetscInt *riip,
324b2f1ab58SBarry Smith         spbas_matrix pattern,
325d36aa34eSBarry Smith         PetscReal droptol, PetscReal epsdiag_in,
326b2f1ab58SBarry Smith         spbas_matrix * matrix_L)
327b2f1ab58SBarry Smith {
328b2f1ab58SBarry Smith    PetscInt jL;
329b2f1ab58SBarry Smith    Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
330b2f1ab58SBarry Smith    PetscInt       *ai=a->i,*aj=a->j;
331b2f1ab58SBarry Smith    MatScalar      *aa=a->a;
332b2f1ab58SBarry Smith    PetscInt nrows, ncols;
333b2f1ab58SBarry Smith    PetscInt *max_row_nnz;
334b2f1ab58SBarry Smith    PetscErrorCode ierr;
335b2f1ab58SBarry Smith    spbas_matrix retval;
336b2f1ab58SBarry Smith    PetscScalar * diag;
337b2f1ab58SBarry Smith    PetscScalar * val;
338b2f1ab58SBarry Smith    PetscScalar * lvec;
339b2f1ab58SBarry Smith    PetscScalar epsdiag;
340b2f1ab58SBarry Smith    PetscInt i,j,k;
341b2f1ab58SBarry Smith    const PetscTruth do_values = PETSC_TRUE;
342b2f1ab58SBarry Smith    PetscInt * r1_icol; PetscScalar *r1_val;
343b2f1ab58SBarry Smith    PetscInt * r_icol; PetscInt r_nnz; PetscScalar *r_val;
344b2f1ab58SBarry Smith    PetscInt * A_icol; PetscInt A_nnz; PetscScalar *A_val;
345b2f1ab58SBarry Smith    PetscInt * p_icol; PetscInt p_nnz;
346c328eeadSBarry Smith    PetscInt n_row_alloc_ok = 0;  /* number of rows which have been stored
347c328eeadSBarry Smith 				    correctly in the matrix */
348c328eeadSBarry Smith    PetscInt n_alloc_used = 0;    /* part of result->icols and result->values
349c328eeadSBarry Smith 				    which is currently being used */
350b2f1ab58SBarry Smith    PetscFunctionBegin;
351b2f1ab58SBarry Smith 
352c328eeadSBarry Smith    /* Convert the Manteuffel shift from 'fraction of average diagonal' to
353c328eeadSBarry Smith       dimensioned value */
354b2f1ab58SBarry Smith    ierr = MatGetSize(A, &nrows, &ncols); CHKERRQ(ierr);
355b2f1ab58SBarry Smith    ierr = MatGetTrace(A, &epsdiag); CHKERRQ(ierr);
356b2f1ab58SBarry Smith    epsdiag *= epsdiag_in / nrows;
3579767453cSBarry Smith    ierr = PetscInfo2(PETSC_NULL,"   Dimensioned Manteuffel shift %G Drop tolerance %G\n", PetscRealPart(epsdiag),droptol);CHKERRQ(ierr);
358b2f1ab58SBarry Smith 
359b2f1ab58SBarry Smith    if (droptol<1e-10) {droptol=1e-10;}
360b2f1ab58SBarry Smith 
361b2f1ab58SBarry Smith    if ( (nrows != pattern.nrows) ||
362b2f1ab58SBarry Smith         (ncols != pattern.ncols) ||
363b2f1ab58SBarry Smith         (ncols != nrows) )
364b2f1ab58SBarry Smith    {
365b2f1ab58SBarry Smith       SETERRQ( PETSC_ERR_ARG_INCOMP,
366b2f1ab58SBarry Smith                "Dimension error in spbas_incomplete_cholesky\n");
367b2f1ab58SBarry Smith    }
368b2f1ab58SBarry Smith 
369b2f1ab58SBarry Smith    retval.nrows = nrows;
370b2f1ab58SBarry Smith    retval.ncols = nrows;
371b2f1ab58SBarry Smith    retval.nnz   = pattern.nnz/10;
372b2f1ab58SBarry Smith    retval.col_idx_type = SPBAS_COLUMN_NUMBERS;
373b2f1ab58SBarry Smith    retval.block_data = PETSC_TRUE;
374b2f1ab58SBarry Smith 
375b2f1ab58SBarry Smith    ierr =  spbas_allocate_pattern(&retval, do_values); CHKERRQ(ierr);
376b2f1ab58SBarry Smith    memset((void*) retval.row_nnz, 0, nrows*sizeof(int));
377b2f1ab58SBarry Smith    ierr =  spbas_allocate_data(&retval); CHKERRQ(ierr);
378b2f1ab58SBarry Smith    retval.nnz   = 0;
379b2f1ab58SBarry Smith 
380b2f1ab58SBarry Smith    ierr = PetscMalloc(nrows*sizeof(PetscScalar), &diag); CHKERRQ(ierr);
381b2f1ab58SBarry Smith    ierr = PetscMalloc(nrows*sizeof(PetscScalar), &val);  CHKERRQ(ierr);
382b2f1ab58SBarry Smith    ierr = PetscMalloc(nrows*sizeof(PetscScalar), &lvec); CHKERRQ(ierr);
383b2f1ab58SBarry Smith    ierr = PetscMalloc(nrows*sizeof(int), &max_row_nnz);  CHKERRQ(ierr);
384b2f1ab58SBarry Smith    if (!(diag && val && lvec && max_row_nnz))
385b2f1ab58SBarry Smith    {
386b2f1ab58SBarry Smith       SETERRQ( PETSC_ERR_MEM,
387b2f1ab58SBarry Smith               "Allocation error in spbas_incomplete_cholesky\n");
388b2f1ab58SBarry Smith    }
389b2f1ab58SBarry Smith 
390b2f1ab58SBarry Smith    memset((void*) val, 0, nrows*sizeof(PetscScalar));
391b2f1ab58SBarry Smith    memset((void*) lvec, 0, nrows*sizeof(PetscScalar));
392b2f1ab58SBarry Smith    memset((void*) max_row_nnz, 0, nrows*sizeof(int));
393b2f1ab58SBarry Smith 
394c328eeadSBarry Smith    /* Check correct format of sparseness pattern */
395b2f1ab58SBarry Smith    if (pattern.col_idx_type != SPBAS_DIAGONAL_OFFSETS)
396b2f1ab58SBarry Smith    {
397b2f1ab58SBarry Smith       SETERRQ( PETSC_ERR_SUP_SYS,
398b2f1ab58SBarry Smith                "Error in spbas_incomplete_cholesky: must have diagonal offsets in pattern\n");
399b2f1ab58SBarry Smith    }
400b2f1ab58SBarry Smith 
401c328eeadSBarry Smith    /* Count the nonzeros on transpose of pattern */
402b2f1ab58SBarry Smith    for (i = 0; i<nrows; i++)
403b2f1ab58SBarry Smith    {
404b2f1ab58SBarry Smith       p_nnz  = pattern.row_nnz[i];
405b2f1ab58SBarry Smith       p_icol = pattern.icols[i];
406b2f1ab58SBarry Smith       for (j=0; j<p_nnz; j++)
407b2f1ab58SBarry Smith       {
408b2f1ab58SBarry Smith          max_row_nnz[i+p_icol[j]]++;
409b2f1ab58SBarry Smith       }
410b2f1ab58SBarry Smith    }
411b2f1ab58SBarry Smith 
412c328eeadSBarry Smith    /* Calculate rows of L */
413b2f1ab58SBarry Smith    for (i = 0; i<nrows; i++)
414b2f1ab58SBarry Smith    {
415b2f1ab58SBarry Smith       p_nnz  = pattern.row_nnz[i];
416b2f1ab58SBarry Smith       p_icol = pattern.icols[i];
417b2f1ab58SBarry Smith 
418b2f1ab58SBarry Smith       r_nnz  = retval.row_nnz[i];
419b2f1ab58SBarry Smith       r_icol = retval.icols[i];
420b2f1ab58SBarry Smith       r_val  = retval.values[i];
421b2f1ab58SBarry Smith       A_nnz  = ai[rip[i]+1] - ai[rip[i]];
422b2f1ab58SBarry Smith       A_icol = &aj[ai[rip[i]]];
423b2f1ab58SBarry Smith       A_val  = &aa[ai[rip[i]]];
424b2f1ab58SBarry Smith 
425c328eeadSBarry Smith       /* Calculate  val += A(i,i:n)'; */
426b2f1ab58SBarry Smith       for (j=0; j<A_nnz; j++)
427b2f1ab58SBarry Smith       {
428b2f1ab58SBarry Smith          k = riip[A_icol[j]];
429b2f1ab58SBarry Smith          if (k>=i) { val[k] = A_val[j]; }
430b2f1ab58SBarry Smith       }
431b2f1ab58SBarry Smith 
432c328eeadSBarry Smith       /*  Add regularization */
433b2f1ab58SBarry Smith       val[i] += epsdiag;
434b2f1ab58SBarry Smith 
435c328eeadSBarry Smith       /* Calculate lvec   = diag(D(0:i-1)) * L(0:i-1,i);
436c328eeadSBarry Smith 	 val(i) = A(i,i) - L(0:i-1,i)' * lvec */
437b2f1ab58SBarry Smith       for ( j=0; j<r_nnz; j++)
438b2f1ab58SBarry Smith       {
439b2f1ab58SBarry Smith          k           = r_icol[j];
440b2f1ab58SBarry Smith          lvec[k]     = diag[k] * r_val[j];
441b2f1ab58SBarry Smith          val[i]     -= r_val[j] * lvec[k];
442b2f1ab58SBarry Smith       }
443b2f1ab58SBarry Smith 
444c328eeadSBarry Smith       /* Calculate the new diagonal */
445b2f1ab58SBarry Smith       diag[i] = val[i];
4469767453cSBarry Smith       if (PetscRealPart(diag[i])<droptol)
447b2f1ab58SBarry Smith       {
448c328eeadSBarry Smith          ierr = PetscInfo(PETSC_NULL,"Error in spbas_incomplete_cholesky:\n");
449c328eeadSBarry Smith          ierr = PetscInfo1(PETSC_NULL,"Negative diagonal in row %d\n",i+1);
450b2f1ab58SBarry Smith 
451c328eeadSBarry Smith          /* Delete the whole matrix at once. */
452b2f1ab58SBarry Smith          ierr = spbas_delete(retval);
453b2f1ab58SBarry Smith          return NEGATIVE_DIAGONAL;
454b2f1ab58SBarry Smith       }
455b2f1ab58SBarry Smith 
456c328eeadSBarry Smith       /* If necessary, allocate arrays */
457b2f1ab58SBarry Smith       if (r_nnz==0)
458b2f1ab58SBarry Smith       {
459b2f1ab58SBarry Smith          ierr = spbas_cholesky_row_alloc( retval, i, 1, &n_alloc_used);
460b2f1ab58SBarry Smith          if (ierr == PETSC_ERR_MEM)
461b2f1ab58SBarry Smith          {
462b2f1ab58SBarry Smith             ierr = spbas_cholesky_garbage_collect( &retval,  i, &n_row_alloc_ok,
463b2f1ab58SBarry Smith                                &n_alloc_used, max_row_nnz);
464b2f1ab58SBarry Smith             CHKERRQ(ierr);
465b2f1ab58SBarry Smith             ierr = spbas_cholesky_row_alloc( retval, i, 1, &n_alloc_used);
466b2f1ab58SBarry Smith          }
467b2f1ab58SBarry Smith          CHKERRQ(ierr);
468b2f1ab58SBarry Smith          r_icol = retval.icols[i];
469b2f1ab58SBarry Smith          r_val  = retval.values[i];
470b2f1ab58SBarry Smith       }
471b2f1ab58SBarry Smith 
472c328eeadSBarry Smith       /* Now, fill in */
473b2f1ab58SBarry Smith       r_icol[r_nnz] = i;
474b2f1ab58SBarry Smith       r_val [r_nnz] = 1.0;
475b2f1ab58SBarry Smith       r_nnz++;
476b2f1ab58SBarry Smith       retval.row_nnz[i]++;
477b2f1ab58SBarry Smith 
478b2f1ab58SBarry Smith       retval.nnz += r_nnz;
479b2f1ab58SBarry Smith 
480c328eeadSBarry Smith       /* Calculate
481c328eeadSBarry Smith          val(i+1:n) = (A(i,i+1:n)- L(0:i-1,i+1:n)' * lvec)/diag(i) */
482b2f1ab58SBarry Smith       for (j=1; j<p_nnz; j++)
483b2f1ab58SBarry Smith       {
484b2f1ab58SBarry Smith          k = i+p_icol[j];
485b2f1ab58SBarry Smith          r1_icol = retval.icols[k];
486b2f1ab58SBarry Smith          r1_val  = retval.values[k];
487b2f1ab58SBarry Smith          for (jL=0; jL<retval.row_nnz[k]; jL++)
488b2f1ab58SBarry Smith          {
489b2f1ab58SBarry Smith             val[k] -= r1_val[jL] * lvec[r1_icol[jL]];
490b2f1ab58SBarry Smith          }
491b2f1ab58SBarry Smith          val[k] /= diag[i];
492b2f1ab58SBarry Smith 
493d36aa34eSBarry Smith          if (PetscAbsScalar(val[k]) > droptol || PetscAbsScalar(val[k])< -droptol)
494b2f1ab58SBarry Smith          {
495c328eeadSBarry Smith 	   /* If necessary, allocate arrays */
496b2f1ab58SBarry Smith             if (retval.row_nnz[k]==0)
497b2f1ab58SBarry Smith             {
498b2f1ab58SBarry Smith                ierr = spbas_cholesky_row_alloc( retval, k, max_row_nnz[k],
499b2f1ab58SBarry Smith                             &n_alloc_used);
500b2f1ab58SBarry Smith                if (ierr == PETSC_ERR_MEM)
501b2f1ab58SBarry Smith                {
502b2f1ab58SBarry Smith                   ierr = spbas_cholesky_garbage_collect(
503b2f1ab58SBarry Smith                                      &retval,  i, &n_row_alloc_ok,
504b2f1ab58SBarry Smith                                      &n_alloc_used, max_row_nnz);
505b2f1ab58SBarry Smith                   CHKERRQ(ierr);
506b2f1ab58SBarry Smith                   ierr = spbas_cholesky_row_alloc( retval, k, max_row_nnz[k],
507b2f1ab58SBarry Smith                             &n_alloc_used);
508b2f1ab58SBarry Smith                   r_icol = retval.icols[i];
509b2f1ab58SBarry Smith                   r_val  = retval.values[i];
510b2f1ab58SBarry Smith                }
511b2f1ab58SBarry Smith                CHKERRQ(ierr);
512b2f1ab58SBarry Smith             }
513b2f1ab58SBarry Smith 
514b2f1ab58SBarry Smith             retval.icols[k][retval.row_nnz[k]] = i;
515b2f1ab58SBarry Smith             retval.values[k][retval.row_nnz[k]] = val[k];
516b2f1ab58SBarry Smith             retval.row_nnz[k]++;
517b2f1ab58SBarry Smith          }
518b2f1ab58SBarry Smith          val[k] = 0;
519b2f1ab58SBarry Smith       }
520b2f1ab58SBarry Smith 
521*71d55d18SBarry Smith       /*Erase the values used in the work arrays */
522b2f1ab58SBarry Smith       for (j=0; j<r_nnz; j++) { lvec[r_icol[j]] = 0; }
523b2f1ab58SBarry Smith    }
524b2f1ab58SBarry Smith 
525b2f1ab58SBarry Smith    ierr=PetscFree(lvec); CHKERRQ(ierr); ierr=PetscFree(val); CHKERRQ(ierr);
526b2f1ab58SBarry Smith 
527b2f1ab58SBarry Smith    ierr = spbas_cholesky_garbage_collect( &retval, nrows, &n_row_alloc_ok,
528b2f1ab58SBarry Smith                                      &n_alloc_used, max_row_nnz);
529b2f1ab58SBarry Smith    ierr=PetscFree(max_row_nnz); CHKERRQ(ierr);
530b2f1ab58SBarry Smith 
531c328eeadSBarry Smith    /* Place the inverse of the diagonals in the matrix */
532b2f1ab58SBarry Smith    for (i=0; i<nrows; i++)
533b2f1ab58SBarry Smith    {
534b2f1ab58SBarry Smith       r_nnz = retval.row_nnz[i];
535b2f1ab58SBarry Smith       retval.values[i][r_nnz-1] = 1.0 / diag[i];
536b2f1ab58SBarry Smith       for (j=0; j<r_nnz-1; j++)
537b2f1ab58SBarry Smith       {
538b2f1ab58SBarry Smith          retval.values[i][j] *= -1;
539b2f1ab58SBarry Smith       }
540b2f1ab58SBarry Smith    }
541b2f1ab58SBarry Smith    ierr=PetscFree(diag); CHKERRQ(ierr);
542b2f1ab58SBarry Smith 
543b2f1ab58SBarry Smith 
544b2f1ab58SBarry Smith    *matrix_L = retval;
545b2f1ab58SBarry Smith 
546b2f1ab58SBarry Smith    PetscFunctionReturn(0);
547b2f1ab58SBarry Smith }
548b2f1ab58SBarry Smith 
549