xref: /petsc/src/mat/impls/aij/seq/bas/spbas_cholesky.h (revision d36aa34e572813df9f6a7ac388db98c2655a68a6)
1b2f1ab58SBarry Smith 
2b2f1ab58SBarry Smith /*
3b2f1ab58SBarry Smith    spbas_cholesky_row_alloc:
4b2f1ab58SBarry Smith       in the data arrays, find a place where another row may be stored.
5b2f1ab58SBarry Smith       Return PETSC_ERR_MEM if there is insufficient space to store the
6b2f1ab58SBarry Smith       row, so garbage collection and/or re-allocation may be done.
7b2f1ab58SBarry Smith */
8b2f1ab58SBarry Smith #undef __FUNCT__
9b2f1ab58SBarry Smith #define __FUNCT__ "spbas_cholesky_row_alloc"
10b2f1ab58SBarry Smith PetscErrorCode spbas_cholesky_row_alloc(spbas_matrix retval, PetscInt k, PetscInt r_nnz,PetscInt * n_alloc_used)
11b2f1ab58SBarry Smith {
12b2f1ab58SBarry Smith    PetscFunctionBegin;
13b2f1ab58SBarry Smith    retval.icols[k]  = &retval.alloc_icol[*n_alloc_used];
14b2f1ab58SBarry Smith    retval.values[k] = &retval.alloc_val[*n_alloc_used];
15b2f1ab58SBarry Smith    *n_alloc_used   += r_nnz;
16b2f1ab58SBarry Smith    if (*n_alloc_used > retval.n_alloc_icol)
17b2f1ab58SBarry Smith    {
18b2f1ab58SBarry Smith       PetscFunctionReturn(PETSC_ERR_MEM);
19b2f1ab58SBarry Smith    }
20b2f1ab58SBarry Smith    else
21b2f1ab58SBarry Smith    {
22b2f1ab58SBarry Smith       PetscFunctionReturn(0);
23b2f1ab58SBarry Smith    }
24b2f1ab58SBarry Smith }
25b2f1ab58SBarry Smith 
26b2f1ab58SBarry Smith 
27b2f1ab58SBarry Smith /*
28b2f1ab58SBarry Smith   spbas_cholesky_garbage_collect:
29b2f1ab58SBarry Smith      move the rows which have been calculated so far, as well as
30b2f1ab58SBarry Smith      those currently under construction, to the front of the
31b2f1ab58SBarry Smith      array, while putting them in the proper order.
32b2f1ab58SBarry Smith      When it seems necessary, enlarge the current arrays.
33b2f1ab58SBarry Smith 
34b2f1ab58SBarry Smith      NB: re-allocation is being simulated using
35b2f1ab58SBarry Smith          PetscMalloc, memcpy, PetscFree, because
36b2f1ab58SBarry Smith          PetscRealloc does not seem to exist.
37b2f1ab58SBarry Smith 
38b2f1ab58SBarry Smith */
39b2f1ab58SBarry Smith #undef __FUNCT__
40b2f1ab58SBarry Smith #define __FUNCT__ "spbas_cholesky_garbage_collect"
41b2f1ab58SBarry Smith PetscErrorCode spbas_cholesky_garbage_collect(
42b2f1ab58SBarry Smith       spbas_matrix * result, // I/O: the Cholesky factor matrix being constructed.
43b2f1ab58SBarry Smith                              //      Only the storage, not the contents of this matrix
44b2f1ab58SBarry Smith                              //      is changed in this function
45b2f1ab58SBarry Smith       PetscInt i_row,             // I  : Number of rows for which the final contents are
46b2f1ab58SBarry Smith                              //      known
47b2f1ab58SBarry Smith       PetscInt *n_row_alloc_ok,   // I/O: Number of rows which are already in their final
48b2f1ab58SBarry Smith                              //      places in the arrays: they need not be moved any more
49b2f1ab58SBarry Smith       PetscInt *n_alloc_used,     // I/O:
50b2f1ab58SBarry Smith       PetscInt *max_row_nnz       // I  : Over-estimate of the number of nonzeros needed to
51b2f1ab58SBarry Smith                              //      store each row
52b2f1ab58SBarry Smith      )
53b2f1ab58SBarry Smith {
54b2f1ab58SBarry Smith 
55b2f1ab58SBarry Smith #undef garbage_debug
56b2f1ab58SBarry Smith 
57b2f1ab58SBarry Smith    // PSEUDO-CODE:
58b2f1ab58SBarry Smith    //  1. Choose the appropriate size for the arrays
59b2f1ab58SBarry Smith    //  2. Rescue the arrays which would be lost during garbage collection
60b2f1ab58SBarry Smith    //  3. Reallocate and correct administration
61b2f1ab58SBarry Smith    //  4. Move all arrays so that they are in proper order
62b2f1ab58SBarry Smith 
63b2f1ab58SBarry Smith    PetscInt i,j;
64b2f1ab58SBarry Smith    PetscInt nrows = result->nrows;
65b2f1ab58SBarry Smith    PetscInt n_alloc_ok=0;
66b2f1ab58SBarry Smith    PetscInt n_alloc_ok_max=0;
67b2f1ab58SBarry Smith 
68b2f1ab58SBarry Smith    PetscErrorCode ierr;
69b2f1ab58SBarry Smith    PetscInt need_already= 0;
70b2f1ab58SBarry Smith    PetscInt n_rows_ahead=0;
71b2f1ab58SBarry Smith    PetscInt max_need_extra= 0;
72b2f1ab58SBarry Smith    PetscInt n_alloc_max, n_alloc_est, n_alloc;
73b2f1ab58SBarry Smith    PetscInt n_alloc_now = result->n_alloc_icol;
74b2f1ab58SBarry Smith    PetscInt *alloc_icol_old = result->alloc_icol;
75b2f1ab58SBarry Smith    PetscScalar *alloc_val_old  = result->alloc_val;
76b2f1ab58SBarry Smith    PetscInt *icol_rescue;
77b2f1ab58SBarry Smith    PetscScalar *val_rescue;
78b2f1ab58SBarry Smith    PetscInt n_rescue;
79b2f1ab58SBarry Smith    PetscInt n_row_rescue;
80b2f1ab58SBarry Smith    PetscInt i_here, i_last, n_copy;
81*d36aa34eSBarry Smith    const PetscReal xtra_perc = 20;
82b2f1ab58SBarry Smith 
83b2f1ab58SBarry Smith    PetscFunctionBegin;
84b2f1ab58SBarry Smith 
85b2f1ab58SBarry Smith    //*********************************************************
86b2f1ab58SBarry Smith    // 1. Choose appropriate array size
87b2f1ab58SBarry Smith    // Count number of rows and memory usage which is already final
88b2f1ab58SBarry Smith    for (i=0;i<i_row; i++)
89b2f1ab58SBarry Smith    {
90b2f1ab58SBarry Smith       n_alloc_ok += result->row_nnz[i];
91b2f1ab58SBarry Smith       n_alloc_ok_max += max_row_nnz[i];
92b2f1ab58SBarry Smith    }
93b2f1ab58SBarry Smith 
94b2f1ab58SBarry Smith    // Count currently needed memory usage and future memory requirements
95b2f1ab58SBarry Smith    // (max, predicted)
96b2f1ab58SBarry Smith    for (i=i_row; i<nrows; i++)
97b2f1ab58SBarry Smith    {
98b2f1ab58SBarry Smith       if (result->row_nnz[i]==0)
99b2f1ab58SBarry Smith       {
100b2f1ab58SBarry Smith          max_need_extra  += max_row_nnz[i];
101b2f1ab58SBarry Smith       }
102b2f1ab58SBarry Smith       else
103b2f1ab58SBarry Smith       {
104b2f1ab58SBarry Smith          need_already    += max_row_nnz[i];
105b2f1ab58SBarry Smith          n_rows_ahead++;
106b2f1ab58SBarry Smith       }
107b2f1ab58SBarry Smith    }
108b2f1ab58SBarry Smith 
109b2f1ab58SBarry Smith    // Make maximal and realistic memory requirement estimates
110b2f1ab58SBarry Smith    n_alloc_max = n_alloc_ok + need_already + max_need_extra;
111*d36aa34eSBarry Smith    n_alloc_est = n_alloc_ok + need_already + (int) (((PetscReal) max_need_extra) *
112*d36aa34eSBarry Smith 						    ((PetscReal) n_alloc_ok) /((PetscReal) n_alloc_ok_max));
113b2f1ab58SBarry Smith 
114b2f1ab58SBarry Smith    // Choose array sizes
115b2f1ab58SBarry Smith    if (n_alloc_max == n_alloc_est)
116b2f1ab58SBarry Smith       { n_alloc = n_alloc_max; }
117b2f1ab58SBarry Smith    else if (n_alloc_now >= n_alloc_est)
118b2f1ab58SBarry Smith       { n_alloc = n_alloc_now; }
119b2f1ab58SBarry Smith    else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0))
120b2f1ab58SBarry Smith       { n_alloc  = n_alloc_max;  }
121b2f1ab58SBarry Smith    else
122b2f1ab58SBarry Smith       { n_alloc = (int) (n_alloc_est * (1+xtra_perc/100.0)); }
123b2f1ab58SBarry Smith 
124b2f1ab58SBarry Smith    // If new estimate is less than what we already have,
125b2f1ab58SBarry Smith    //   don't reallocate, just garbage-collect
126b2f1ab58SBarry Smith    if (n_alloc_max != n_alloc_est && n_alloc < result->n_alloc_icol)
127b2f1ab58SBarry Smith    {
128b2f1ab58SBarry Smith       n_alloc = result->n_alloc_icol;
129b2f1ab58SBarry Smith    }
130b2f1ab58SBarry Smith 
131b2f1ab58SBarry Smith #ifdef garbage_debug
132b2f1ab58SBarry Smith    // Print allocation considerations
133b2f1ab58SBarry Smith    printf(" + Final version of rows 1..%d is available\n",i_row);
134b2f1ab58SBarry Smith    printf("   They have %d nonzeros: %6.2f per row\n",n_alloc_ok,
135b2f1ab58SBarry Smith                   (PetscScalar) n_alloc_ok / (PetscScalar)i_row);
136b2f1ab58SBarry Smith    printf("   Without droptol, I would have had %d nonzeros\n",
137b2f1ab58SBarry Smith               n_alloc_ok_max);
138b2f1ab58SBarry Smith    printf("   So I use %6.2f%% of the allowed sparseness\n",
139b2f1ab58SBarry Smith                   100.0 * (PetscScalar) n_alloc_ok /
140b2f1ab58SBarry Smith                           (PetscScalar) n_alloc_ok_max);
141b2f1ab58SBarry Smith    printf(" + I have %d rows for which I need full sparseness pattern\n",
142b2f1ab58SBarry Smith                   n_rows_ahead);
143b2f1ab58SBarry Smith    printf("   They require %d nonzeros\n",
144b2f1ab58SBarry Smith                need_already);
145b2f1ab58SBarry Smith    printf(" + After that, I shall need %d more rows\n",
146b2f1ab58SBarry Smith               nrows-n_rows_ahead-i_row);
147b2f1ab58SBarry Smith    printf("   Maximally, I will need %d nonzeros there\n",
148b2f1ab58SBarry Smith                        max_need_extra);
149b2f1ab58SBarry Smith    printf("   Realistically, I expect to need %6.2f nonzeros there\n",
150b2f1ab58SBarry Smith                (PetscScalar) max_need_extra *
151b2f1ab58SBarry Smith                           (PetscScalar) n_alloc_ok /
152b2f1ab58SBarry Smith                           (PetscScalar) n_alloc_ok_max);
153b2f1ab58SBarry Smith    printf(" + New allocated arrays should be %d (max) or %d (realistic) long\n",
154b2f1ab58SBarry Smith               n_alloc_max, n_alloc_est);
155b2f1ab58SBarry Smith 
156b2f1ab58SBarry Smith #endif
157b2f1ab58SBarry Smith    // Motivate dimension choice
158b2f1ab58SBarry Smith    printf("   Allocating %d nonzeros: ",n_alloc);
159b2f1ab58SBarry Smith    if (n_alloc_max == n_alloc_est)
160b2f1ab58SBarry Smith       { printf("this is the correct size\n"); }
161b2f1ab58SBarry Smith    else if (n_alloc_now >= n_alloc_est)
162b2f1ab58SBarry Smith       { printf("the current size, which seems enough\n"); }
163b2f1ab58SBarry Smith    else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0))
164b2f1ab58SBarry Smith       { printf("the maximum estimate\n"); }
165b2f1ab58SBarry Smith    else
166b2f1ab58SBarry Smith       { printf("%6.2f %% more than the estimate\n",xtra_perc); }
167b2f1ab58SBarry Smith 
168b2f1ab58SBarry Smith 
169b2f1ab58SBarry Smith    //*********************************************************
170b2f1ab58SBarry Smith    // 2. Rescue arrays which would be lost
171b2f1ab58SBarry Smith    // Count how many rows and nonzeros will have to be rescued
172b2f1ab58SBarry Smith    // when moving all arrays in place
173b2f1ab58SBarry Smith    n_row_rescue = 0; n_rescue = 0;
174b2f1ab58SBarry Smith    if (*n_row_alloc_ok==0) { *n_alloc_used = 0; }
175b2f1ab58SBarry Smith    else
176b2f1ab58SBarry Smith    {
177b2f1ab58SBarry Smith       i = *n_row_alloc_ok - 1;
178b2f1ab58SBarry Smith       *n_alloc_used = (result->icols[i]-result->alloc_icol) +
179b2f1ab58SBarry Smith                        result->row_nnz[i];
180b2f1ab58SBarry Smith    }
181b2f1ab58SBarry Smith 
182b2f1ab58SBarry Smith #ifdef garbage_debug
183b2f1ab58SBarry Smith    printf("\n\n");
184b2f1ab58SBarry Smith    printf("Rows 0..%d are already OK\n", *n_row_alloc_ok);
185b2f1ab58SBarry Smith    printf("Rows 0..%d have the right dimensions\n", i_row-1);
186b2f1ab58SBarry Smith #endif
187b2f1ab58SBarry Smith 
188b2f1ab58SBarry Smith    for (i=*n_row_alloc_ok; i<nrows; i++)
189b2f1ab58SBarry Smith    {
190b2f1ab58SBarry Smith       i_here = result->icols[i]-result->alloc_icol;
191b2f1ab58SBarry Smith       i_last = i_here + result->row_nnz[i];
192b2f1ab58SBarry Smith       if (result->row_nnz[i]>0)
193b2f1ab58SBarry Smith       {
194b2f1ab58SBarry Smith          if (*n_alloc_used > i_here || i_last > n_alloc)
195b2f1ab58SBarry Smith          {
196b2f1ab58SBarry Smith             n_rescue += result->row_nnz[i];
197b2f1ab58SBarry Smith             n_row_rescue++;
198b2f1ab58SBarry Smith             //printf("     Rescue row %d: %d nonzeros\n",i,result->row_nnz[i]);
199b2f1ab58SBarry Smith             //printf("        (New start at %d, old start at %d)\n",
200b2f1ab58SBarry Smith             //             *n_alloc_used, i_here);
201b2f1ab58SBarry Smith          }
202b2f1ab58SBarry Smith 
203b2f1ab58SBarry Smith          if (i<i_row) {  *n_alloc_used += result->row_nnz[i];}
204b2f1ab58SBarry Smith          else         {  *n_alloc_used += max_row_nnz[i];}
205b2f1ab58SBarry Smith       }
206b2f1ab58SBarry Smith    }
207b2f1ab58SBarry Smith 
208b2f1ab58SBarry Smith #ifdef garbage_debug
209b2f1ab58SBarry Smith    printf(" + %d arrays will have to be rescued:  %d nnz\n",
210b2f1ab58SBarry Smith              n_row_rescue, n_rescue);
211b2f1ab58SBarry Smith #endif
212b2f1ab58SBarry Smith 
213b2f1ab58SBarry Smith    // Allocate rescue arrays
214b2f1ab58SBarry Smith    ierr = PetscMalloc( n_rescue * sizeof(int), &icol_rescue);
215b2f1ab58SBarry Smith    CHKERRQ(ierr);
216b2f1ab58SBarry Smith    ierr = PetscMalloc( n_rescue * sizeof(PetscScalar), &val_rescue);
217b2f1ab58SBarry Smith    CHKERRQ(ierr);
218b2f1ab58SBarry Smith 
219b2f1ab58SBarry Smith    // Rescue the arrays which need rescuing
220b2f1ab58SBarry Smith    n_row_rescue = 0; n_rescue = 0;
221b2f1ab58SBarry Smith    if (*n_row_alloc_ok==0) { *n_alloc_used = 0; }
222b2f1ab58SBarry Smith    else
223b2f1ab58SBarry Smith    {
224b2f1ab58SBarry Smith       i = *n_row_alloc_ok - 1;
225b2f1ab58SBarry Smith       *n_alloc_used = (result->icols[i]-result->alloc_icol) +
226b2f1ab58SBarry Smith                        result->row_nnz[i];
227b2f1ab58SBarry Smith    }
228b2f1ab58SBarry Smith 
229b2f1ab58SBarry Smith    for (i=*n_row_alloc_ok; i<nrows; i++)
230b2f1ab58SBarry Smith    {
231b2f1ab58SBarry Smith       i_here = result->icols[i]-result->alloc_icol;
232b2f1ab58SBarry Smith       i_last = i_here + result->row_nnz[i];
233b2f1ab58SBarry Smith       if (result->row_nnz[i]>0)
234b2f1ab58SBarry Smith       {
235b2f1ab58SBarry Smith          if (*n_alloc_used > i_here || i_last > n_alloc)
236b2f1ab58SBarry Smith          {
237b2f1ab58SBarry Smith             //printf("     Rescuing row %d to array[%d..]\n",i,n_rescue);
238b2f1ab58SBarry Smith             memcpy((void*) &icol_rescue[n_rescue],
239b2f1ab58SBarry Smith                    (void*) result->icols[i],
240b2f1ab58SBarry Smith                     result->row_nnz[i] * sizeof(int));
241b2f1ab58SBarry Smith             memcpy((void*) &val_rescue[n_rescue],
242b2f1ab58SBarry Smith                    (void*) result->values[i],
243b2f1ab58SBarry Smith                    result->row_nnz[i] * sizeof(PetscScalar));
244b2f1ab58SBarry Smith             n_rescue += result->row_nnz[i];
245b2f1ab58SBarry Smith             n_row_rescue++;
246b2f1ab58SBarry Smith          }
247b2f1ab58SBarry Smith 
248b2f1ab58SBarry Smith          if (i<i_row) {  *n_alloc_used += result->row_nnz[i];}
249b2f1ab58SBarry Smith          else         {  *n_alloc_used += max_row_nnz[i];}
250b2f1ab58SBarry Smith       }
251b2f1ab58SBarry Smith    }
252b2f1ab58SBarry Smith 
253b2f1ab58SBarry Smith    //*********************************************************
254b2f1ab58SBarry Smith    // 3. Reallocate and correct administration
255b2f1ab58SBarry Smith 
256b2f1ab58SBarry Smith    if (n_alloc != result->n_alloc_icol)
257b2f1ab58SBarry Smith    {
258b2f1ab58SBarry Smith       n_copy = MIN(n_alloc,result->n_alloc_icol);
259b2f1ab58SBarry Smith 
260b2f1ab58SBarry Smith       // PETSC knows no REALLOC, so we'll REALLOC ourselves.
261b2f1ab58SBarry Smith 
262b2f1ab58SBarry Smith       // Allocate new icol-data, copy old contents
263b2f1ab58SBarry Smith       ierr = PetscMalloc(  n_alloc * sizeof(int),
264b2f1ab58SBarry Smith                            &result->alloc_icol); CHKERRQ(ierr);
265b2f1ab58SBarry Smith       memcpy(result->alloc_icol, alloc_icol_old, n_copy*sizeof(int));
266b2f1ab58SBarry Smith 
267b2f1ab58SBarry Smith       // Update administration, Reset pointers to new arrays
268b2f1ab58SBarry Smith       result->n_alloc_icol = n_alloc;
269b2f1ab58SBarry Smith       for (i=0; i<nrows; i++)
270b2f1ab58SBarry Smith       {
271b2f1ab58SBarry Smith           result->icols[i] =  result->alloc_icol +
272b2f1ab58SBarry Smith                   (result->icols[i]  - alloc_icol_old);
273b2f1ab58SBarry Smith           result->values[i] =  result->alloc_val +
274b2f1ab58SBarry Smith                   (result->icols[i]  - result->alloc_icol);
275b2f1ab58SBarry Smith       }
276b2f1ab58SBarry Smith 
277b2f1ab58SBarry Smith       // Delete old array
278b2f1ab58SBarry Smith       ierr = PetscFree(alloc_icol_old);  CHKERRQ(ierr);
279b2f1ab58SBarry Smith 
280b2f1ab58SBarry Smith       // Allocate new value-data, copy old contents
281b2f1ab58SBarry Smith       ierr = PetscMalloc(  n_alloc * sizeof(PetscScalar),
282b2f1ab58SBarry Smith                            &result->alloc_val); CHKERRQ(ierr);
283b2f1ab58SBarry Smith       memcpy(result->alloc_val, alloc_val_old, n_copy*sizeof(PetscScalar));
284b2f1ab58SBarry Smith 
285b2f1ab58SBarry Smith       // Update administration, Reset pointers to new arrays
286b2f1ab58SBarry Smith       result->n_alloc_val  = n_alloc;
287b2f1ab58SBarry Smith       for (i=0; i<nrows; i++)
288b2f1ab58SBarry Smith       {
289b2f1ab58SBarry Smith           result->values[i] =  result->alloc_val +
290b2f1ab58SBarry Smith                   (result->icols[i]  - result->alloc_icol);
291b2f1ab58SBarry Smith       }
292b2f1ab58SBarry Smith 
293b2f1ab58SBarry Smith       // Delete old array
294b2f1ab58SBarry Smith       ierr = PetscFree(alloc_val_old);  CHKERRQ(ierr);
295b2f1ab58SBarry Smith    }
296b2f1ab58SBarry Smith 
297b2f1ab58SBarry Smith 
298b2f1ab58SBarry Smith    //*********************************************************
299b2f1ab58SBarry Smith    // 4. Copy all the arrays to their proper places
300b2f1ab58SBarry Smith    n_row_rescue = 0; n_rescue = 0;
301b2f1ab58SBarry Smith    if (*n_row_alloc_ok==0) { *n_alloc_used = 0; }
302b2f1ab58SBarry Smith    else
303b2f1ab58SBarry Smith    {
304b2f1ab58SBarry Smith       i = *n_row_alloc_ok - 1;
305b2f1ab58SBarry Smith       *n_alloc_used = (result->icols[i]-result->alloc_icol) +
306b2f1ab58SBarry Smith                        result->row_nnz[i];
307b2f1ab58SBarry Smith    }
308b2f1ab58SBarry Smith 
309b2f1ab58SBarry Smith    for (i=*n_row_alloc_ok; i<nrows; i++)
310b2f1ab58SBarry Smith    {
311b2f1ab58SBarry Smith       i_here = result->icols[i]-result->alloc_icol;
312b2f1ab58SBarry Smith       i_last = i_here + result->row_nnz[i];
313b2f1ab58SBarry Smith 
314b2f1ab58SBarry Smith       result->icols[i] = result->alloc_icol + *n_alloc_used;
315b2f1ab58SBarry Smith       result->values[i]= result->alloc_val  + *n_alloc_used;
316b2f1ab58SBarry Smith 
317b2f1ab58SBarry Smith       if (result->row_nnz[i]>0)
318b2f1ab58SBarry Smith       {
319b2f1ab58SBarry Smith          if (*n_alloc_used > i_here || i_last > n_alloc)
320b2f1ab58SBarry Smith          {
321b2f1ab58SBarry Smith             //printf("     Copying rescued array[%d..] to row %d\n",
322b2f1ab58SBarry Smith             //       n_rescue,i);
323b2f1ab58SBarry Smith             //fflush(stdout);
324b2f1ab58SBarry Smith             memcpy((void*) result->icols[i],
325b2f1ab58SBarry Smith                    (void*) &icol_rescue[n_rescue],
326b2f1ab58SBarry Smith                     result->row_nnz[i] * sizeof(int));
327b2f1ab58SBarry Smith             memcpy((void*) result->values[i],
328b2f1ab58SBarry Smith                    (void*) &val_rescue[n_rescue],
329b2f1ab58SBarry Smith                     result->row_nnz[i] * sizeof(PetscScalar));
330b2f1ab58SBarry Smith             n_rescue += result->row_nnz[i];
331b2f1ab58SBarry Smith             n_row_rescue++;
332b2f1ab58SBarry Smith          }
333b2f1ab58SBarry Smith          else
334b2f1ab58SBarry Smith          {
335b2f1ab58SBarry Smith             for (j=0; j<result->row_nnz[i]; j++)
336b2f1ab58SBarry Smith             {
337b2f1ab58SBarry Smith                 result->icols[i][j]   = result->alloc_icol[i_here+j];
338b2f1ab58SBarry Smith                 result->values[i][j]  = result->alloc_val[ i_here+j];
339b2f1ab58SBarry Smith             }
340b2f1ab58SBarry Smith          }
341b2f1ab58SBarry Smith          if (i<i_row) {  *n_alloc_used += result->row_nnz[i];}
342b2f1ab58SBarry Smith          else         {  *n_alloc_used += max_row_nnz[i];}
343b2f1ab58SBarry Smith       }
344b2f1ab58SBarry Smith    }
345b2f1ab58SBarry Smith 
346b2f1ab58SBarry Smith    // Delete the rescue arrays
347b2f1ab58SBarry Smith    ierr = PetscFree(icol_rescue); CHKERRQ(ierr);
348b2f1ab58SBarry Smith    ierr = PetscFree(val_rescue);  CHKERRQ(ierr);
349b2f1ab58SBarry Smith    *n_row_alloc_ok = i_row;
350b2f1ab58SBarry Smith 
351b2f1ab58SBarry Smith    PetscFunctionReturn(0);
352b2f1ab58SBarry Smith 
353b2f1ab58SBarry Smith }
354b2f1ab58SBarry Smith 
355b2f1ab58SBarry Smith /*
356b2f1ab58SBarry Smith   spbas_incomplete_cholesky:
357b2f1ab58SBarry Smith      incomplete Cholesky decomposition of a square, symmetric,
358b2f1ab58SBarry Smith      positive definite matrix.
359b2f1ab58SBarry Smith 
360b2f1ab58SBarry Smith      In case negative diagonals are encountered, function returns
361b2f1ab58SBarry Smith      NEGATIVE_DIAGONAL. When this happens, call this function again
362b2f1ab58SBarry Smith      with a larger epsdiag_in, a less sparse pattern, and/or a smaller
363b2f1ab58SBarry Smith      droptol
364b2f1ab58SBarry Smith */
365b2f1ab58SBarry Smith #undef __FUNCT__
366b2f1ab58SBarry Smith #define __FUNCT__ "spbas_incomplete_cholesky"
367b2f1ab58SBarry Smith PetscErrorCode spbas_incomplete_cholesky(
368b2f1ab58SBarry Smith         Mat A, const PetscInt *rip, const PetscInt *riip,
369b2f1ab58SBarry Smith         spbas_matrix pattern,
370*d36aa34eSBarry Smith         PetscReal droptol, PetscReal epsdiag_in,
371b2f1ab58SBarry Smith         spbas_matrix * matrix_L)
372b2f1ab58SBarry Smith {
373b2f1ab58SBarry Smith 
374b2f1ab58SBarry Smith #undef cholesky_debug
375b2f1ab58SBarry Smith 
376b2f1ab58SBarry Smith #ifdef cholesky_debug
377b2f1ab58SBarry Smith    PetscInt idebug=0;
378b2f1ab58SBarry Smith    const char * fmt= "   (%d,%d) -> %12.8e\n";
379b2f1ab58SBarry Smith    PetscScalar tmp;
380b2f1ab58SBarry Smith #endif
381b2f1ab58SBarry Smith 
382b2f1ab58SBarry Smith    PetscInt jL;
383b2f1ab58SBarry Smith    Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
384b2f1ab58SBarry Smith    PetscInt       *ai=a->i,*aj=a->j;
385b2f1ab58SBarry Smith    MatScalar      *aa=a->a;
386b2f1ab58SBarry Smith    PetscInt nrows, ncols;
387b2f1ab58SBarry Smith    PetscInt *max_row_nnz;
388b2f1ab58SBarry Smith    PetscErrorCode ierr;
389b2f1ab58SBarry Smith    spbas_matrix retval;
390b2f1ab58SBarry Smith    PetscScalar * diag;
391b2f1ab58SBarry Smith    PetscScalar * val;
392b2f1ab58SBarry Smith    PetscScalar * lvec;
393b2f1ab58SBarry Smith    PetscScalar epsdiag;
394b2f1ab58SBarry Smith    PetscInt i,j,k;
395b2f1ab58SBarry Smith    const PetscTruth do_values = PETSC_TRUE;
396b2f1ab58SBarry Smith    PetscInt * r1_icol; PetscScalar *r1_val;
397b2f1ab58SBarry Smith    PetscInt * r_icol; PetscInt r_nnz; PetscScalar *r_val;
398b2f1ab58SBarry Smith    PetscInt * A_icol; PetscInt A_nnz; PetscScalar *A_val;
399b2f1ab58SBarry Smith    PetscInt * p_icol; PetscInt p_nnz;
400b2f1ab58SBarry Smith    PetscInt n_row_alloc_ok = 0;  // number of rows which have been stored
401b2f1ab58SBarry Smith                             // correctly in the matrix
402b2f1ab58SBarry Smith    PetscInt n_alloc_used = 0;    // part of result->icols and result->values
403b2f1ab58SBarry Smith                             // which is currently being used
404b2f1ab58SBarry Smith    PetscFunctionBegin;
405b2f1ab58SBarry Smith 
406b2f1ab58SBarry Smith    // Convert the Manteuffel shift from 'fraction of average diagonal' to
407b2f1ab58SBarry Smith    // dimensioned value
408b2f1ab58SBarry Smith    ierr = MatGetSize(A, &nrows, &ncols); CHKERRQ(ierr);
409b2f1ab58SBarry Smith    ierr = MatGetTrace(A, &epsdiag); CHKERRQ(ierr);
410b2f1ab58SBarry Smith    epsdiag *= epsdiag_in / nrows;
411b2f1ab58SBarry Smith    printf("   Dimensioned Manteuffel shift=%e\n", epsdiag);
412b2f1ab58SBarry Smith    printf("   Drop tolerance              =%e\n",droptol);
413b2f1ab58SBarry Smith 
414b2f1ab58SBarry Smith    if (droptol<1e-10) {droptol=1e-10;}
415b2f1ab58SBarry Smith 
416b2f1ab58SBarry Smith    // Check dimensions
417b2f1ab58SBarry Smith    if ( (nrows != pattern.nrows) ||
418b2f1ab58SBarry Smith         (ncols != pattern.ncols) ||
419b2f1ab58SBarry Smith         (ncols != nrows) )
420b2f1ab58SBarry Smith    {
421b2f1ab58SBarry Smith       SETERRQ( PETSC_ERR_ARG_INCOMP,
422b2f1ab58SBarry Smith                "Dimension error in spbas_incomplete_cholesky\n");
423b2f1ab58SBarry Smith    }
424b2f1ab58SBarry Smith 
425b2f1ab58SBarry Smith    // Set overall values
426b2f1ab58SBarry Smith    retval.nrows = nrows;
427b2f1ab58SBarry Smith    retval.ncols = nrows;
428b2f1ab58SBarry Smith    retval.nnz   = pattern.nnz/10;
429b2f1ab58SBarry Smith    retval.col_idx_type = SPBAS_COLUMN_NUMBERS;
430b2f1ab58SBarry Smith    retval.block_data = PETSC_TRUE;
431b2f1ab58SBarry Smith 
432b2f1ab58SBarry Smith    // Allocate sparseness pattern
433b2f1ab58SBarry Smith    ierr =  spbas_allocate_pattern(&retval, do_values); CHKERRQ(ierr);
434b2f1ab58SBarry Smith    // Initial allocation of data
435b2f1ab58SBarry Smith    memset((void*) retval.row_nnz, 0, nrows*sizeof(int));
436b2f1ab58SBarry Smith    ierr =  spbas_allocate_data(&retval); CHKERRQ(ierr);
437b2f1ab58SBarry Smith    retval.nnz   = 0;
438b2f1ab58SBarry Smith 
439b2f1ab58SBarry Smith    // Allocate work arrays
440b2f1ab58SBarry Smith    ierr = PetscMalloc(nrows*sizeof(PetscScalar), &diag); CHKERRQ(ierr);
441b2f1ab58SBarry Smith    ierr = PetscMalloc(nrows*sizeof(PetscScalar), &val);  CHKERRQ(ierr);
442b2f1ab58SBarry Smith    ierr = PetscMalloc(nrows*sizeof(PetscScalar), &lvec); CHKERRQ(ierr);
443b2f1ab58SBarry Smith    ierr = PetscMalloc(nrows*sizeof(int), &max_row_nnz);  CHKERRQ(ierr);
444b2f1ab58SBarry Smith    if (!(diag && val && lvec && max_row_nnz))
445b2f1ab58SBarry Smith    {
446b2f1ab58SBarry Smith       SETERRQ( PETSC_ERR_MEM,
447b2f1ab58SBarry Smith               "Allocation error in spbas_incomplete_cholesky\n");
448b2f1ab58SBarry Smith    }
449b2f1ab58SBarry Smith 
450b2f1ab58SBarry Smith    memset((void*) val, 0, nrows*sizeof(PetscScalar));
451b2f1ab58SBarry Smith    memset((void*) lvec, 0, nrows*sizeof(PetscScalar));
452b2f1ab58SBarry Smith    memset((void*) max_row_nnz, 0, nrows*sizeof(int));
453b2f1ab58SBarry Smith 
454b2f1ab58SBarry Smith    // Check correct format of sparseness pattern
455b2f1ab58SBarry Smith    if (pattern.col_idx_type != SPBAS_DIAGONAL_OFFSETS)
456b2f1ab58SBarry Smith    {
457b2f1ab58SBarry Smith       SETERRQ( PETSC_ERR_SUP_SYS,
458b2f1ab58SBarry Smith                "Error in spbas_incomplete_cholesky: must have diagonal offsets in pattern\n");
459b2f1ab58SBarry Smith    }
460b2f1ab58SBarry Smith 
461b2f1ab58SBarry Smith    // Count the nonzeros on transpose of pattern
462b2f1ab58SBarry Smith    for (i = 0; i<nrows; i++)
463b2f1ab58SBarry Smith    {
464b2f1ab58SBarry Smith       p_nnz  = pattern.row_nnz[i];
465b2f1ab58SBarry Smith       p_icol = pattern.icols[i];
466b2f1ab58SBarry Smith       for (j=0; j<p_nnz; j++)
467b2f1ab58SBarry Smith       {
468b2f1ab58SBarry Smith          max_row_nnz[i+p_icol[j]]++;
469b2f1ab58SBarry Smith       }
470b2f1ab58SBarry Smith    }
471b2f1ab58SBarry Smith 
472b2f1ab58SBarry Smith    // Calculate rows of L
473b2f1ab58SBarry Smith    for (i = 0; i<nrows; i++)
474b2f1ab58SBarry Smith    {
475b2f1ab58SBarry Smith       p_nnz  = pattern.row_nnz[i];
476b2f1ab58SBarry Smith       p_icol = pattern.icols[i];
477b2f1ab58SBarry Smith 
478b2f1ab58SBarry Smith       r_nnz  = retval.row_nnz[i];
479b2f1ab58SBarry Smith       r_icol = retval.icols[i];
480b2f1ab58SBarry Smith       r_val  = retval.values[i];
481b2f1ab58SBarry Smith       A_nnz  = ai[rip[i]+1] - ai[rip[i]];
482b2f1ab58SBarry Smith       A_icol = &aj[ai[rip[i]]];
483b2f1ab58SBarry Smith       A_val  = &aa[ai[rip[i]]];
484b2f1ab58SBarry Smith 
485b2f1ab58SBarry Smith       // Calculate  val += A(i,i:n)';
486b2f1ab58SBarry Smith       for (j=0; j<A_nnz; j++)
487b2f1ab58SBarry Smith       {
488b2f1ab58SBarry Smith          k = riip[A_icol[j]];
489b2f1ab58SBarry Smith          if (k>=i) { val[k] = A_val[j]; }
490b2f1ab58SBarry Smith       }
491b2f1ab58SBarry Smith 
492b2f1ab58SBarry Smith       // Add regularization
493b2f1ab58SBarry Smith       val[i] += epsdiag;
494b2f1ab58SBarry Smith 
495b2f1ab58SBarry Smith       // Calculate lvec   = diag(D(0:i-1)) * L(0:i-1,i);
496b2f1ab58SBarry Smith       //           val(i) = A(i,i) - L(0:i-1,i)' * lvec
497b2f1ab58SBarry Smith       for ( j=0; j<r_nnz; j++)
498b2f1ab58SBarry Smith       {
499b2f1ab58SBarry Smith          k           = r_icol[j];
500b2f1ab58SBarry Smith          lvec[k]     = diag[k] * r_val[j];
501b2f1ab58SBarry Smith          val[i]     -= r_val[j] * lvec[k];
502b2f1ab58SBarry Smith       }
503b2f1ab58SBarry Smith 
504b2f1ab58SBarry Smith       // Calculate the new diagonal
505b2f1ab58SBarry Smith       diag[i] = val[i];
506*d36aa34eSBarry Smith       if (PetscAbsScalar(diag[i])<droptol)
507b2f1ab58SBarry Smith       {
508b2f1ab58SBarry Smith          printf("Error in spbas_incomplete_cholesky:\n");
509b2f1ab58SBarry Smith          printf("Negative diagonal in row %d\n",i+1);
510b2f1ab58SBarry Smith 
511b2f1ab58SBarry Smith          // Delete the whole matrix at once.
512b2f1ab58SBarry Smith          ierr = spbas_delete(retval);
513b2f1ab58SBarry Smith          return NEGATIVE_DIAGONAL;
514b2f1ab58SBarry Smith       }
515b2f1ab58SBarry Smith 
516b2f1ab58SBarry Smith #ifdef cholesky_debug
517b2f1ab58SBarry Smith       if (idebug>=3)
518b2f1ab58SBarry Smith       {
519b2f1ab58SBarry Smith          printf("diagD * L (%d,:)=\n",i+1);
520b2f1ab58SBarry Smith          for (j=0; j<r_nnz; j++)
521b2f1ab58SBarry Smith          {
522b2f1ab58SBarry Smith             printf(fmt,r_icol[j]+1,i+1,lvec[r_icol[j]]);
523b2f1ab58SBarry Smith          }
524b2f1ab58SBarry Smith          printf("\n\n");
525b2f1ab58SBarry Smith       }
526b2f1ab58SBarry Smith 
527b2f1ab58SBarry Smith       if (idebug>=1) { printf("Diag = %12.8e\n\n\n",diag[i]);}
528b2f1ab58SBarry Smith 
529b2f1ab58SBarry Smith       if (idebug>=2)
530b2f1ab58SBarry Smith       {
531b2f1ab58SBarry Smith          printf("L^T * diagD * L=\n");
532b2f1ab58SBarry Smith          tmp = 0.0;
533b2f1ab58SBarry Smith          for ( j=0; j<r_nnz; j++)
534b2f1ab58SBarry Smith          {
535b2f1ab58SBarry Smith             k     =  r_icol[j];
536b2f1ab58SBarry Smith             tmp   += r_val[j] * lvec[k];
537b2f1ab58SBarry Smith          }
538b2f1ab58SBarry Smith          printf(fmt, i+1,i+1,tmp);
539b2f1ab58SBarry Smith       }
540b2f1ab58SBarry Smith #endif
541b2f1ab58SBarry Smith 
542b2f1ab58SBarry Smith       // If necessary, allocate arrays
543b2f1ab58SBarry Smith       if (r_nnz==0)
544b2f1ab58SBarry Smith       {
545b2f1ab58SBarry Smith          ierr = spbas_cholesky_row_alloc( retval, i, 1, &n_alloc_used);
546b2f1ab58SBarry Smith          if (ierr == PETSC_ERR_MEM)
547b2f1ab58SBarry Smith          {
548b2f1ab58SBarry Smith             ierr = spbas_cholesky_garbage_collect( &retval,  i, &n_row_alloc_ok,
549b2f1ab58SBarry Smith                                &n_alloc_used, max_row_nnz);
550b2f1ab58SBarry Smith             CHKERRQ(ierr);
551b2f1ab58SBarry Smith             ierr = spbas_cholesky_row_alloc( retval, i, 1, &n_alloc_used);
552b2f1ab58SBarry Smith          }
553b2f1ab58SBarry Smith          CHKERRQ(ierr);
554b2f1ab58SBarry Smith          r_icol = retval.icols[i];
555b2f1ab58SBarry Smith          r_val  = retval.values[i];
556b2f1ab58SBarry Smith       }
557b2f1ab58SBarry Smith 
558b2f1ab58SBarry Smith       // Now, fill in
559b2f1ab58SBarry Smith       r_icol[r_nnz] = i;
560b2f1ab58SBarry Smith       r_val [r_nnz] = 1.0;
561b2f1ab58SBarry Smith       r_nnz++;
562b2f1ab58SBarry Smith       retval.row_nnz[i]++;
563b2f1ab58SBarry Smith 
564b2f1ab58SBarry Smith       retval.nnz += r_nnz;
565b2f1ab58SBarry Smith 
566b2f1ab58SBarry Smith       // Calculate
567b2f1ab58SBarry Smith       //   val(i+1:n) = (A(i,i+1:n)- L(0:i-1,i+1:n)' * lvec)/diag(i)
568b2f1ab58SBarry Smith       for (j=1; j<p_nnz; j++)
569b2f1ab58SBarry Smith       {
570b2f1ab58SBarry Smith          k = i+p_icol[j];
571b2f1ab58SBarry Smith          r1_icol = retval.icols[k];
572b2f1ab58SBarry Smith          r1_val  = retval.values[k];
573b2f1ab58SBarry Smith #ifdef cholesky_debug
574b2f1ab58SBarry Smith          tmp = 0.0;
575b2f1ab58SBarry Smith #endif
576b2f1ab58SBarry Smith          for (jL=0; jL<retval.row_nnz[k]; jL++)
577b2f1ab58SBarry Smith          {
578b2f1ab58SBarry Smith             val[k] -= r1_val[jL] * lvec[r1_icol[jL]];
579b2f1ab58SBarry Smith #ifdef cholesky_debug
580b2f1ab58SBarry Smith             tmp    += r1_val[jL] * lvec[r1_icol[jL]];
581b2f1ab58SBarry Smith #endif
582b2f1ab58SBarry Smith          }
583b2f1ab58SBarry Smith #ifdef cholesky_debug
584b2f1ab58SBarry Smith          if (idebug>=2 && tmp!=0) {printf(fmt, k+1,i+1,tmp);}
585b2f1ab58SBarry Smith #endif
586b2f1ab58SBarry Smith          val[k] /= diag[i];
587b2f1ab58SBarry Smith 
588*d36aa34eSBarry Smith          if (PetscAbsScalar(val[k]) > droptol || PetscAbsScalar(val[k])< -droptol)
589b2f1ab58SBarry Smith          {
590b2f1ab58SBarry Smith             // If necessary, allocate arrays
591b2f1ab58SBarry Smith             if (retval.row_nnz[k]==0)
592b2f1ab58SBarry Smith             {
593b2f1ab58SBarry Smith                ierr = spbas_cholesky_row_alloc( retval, k, max_row_nnz[k],
594b2f1ab58SBarry Smith                             &n_alloc_used);
595b2f1ab58SBarry Smith                if (ierr == PETSC_ERR_MEM)
596b2f1ab58SBarry Smith                {
597b2f1ab58SBarry Smith                   ierr = spbas_cholesky_garbage_collect(
598b2f1ab58SBarry Smith                                      &retval,  i, &n_row_alloc_ok,
599b2f1ab58SBarry Smith                                      &n_alloc_used, max_row_nnz);
600b2f1ab58SBarry Smith                   CHKERRQ(ierr);
601b2f1ab58SBarry Smith                   ierr = spbas_cholesky_row_alloc( retval, k, max_row_nnz[k],
602b2f1ab58SBarry Smith                             &n_alloc_used);
603b2f1ab58SBarry Smith                   r_icol = retval.icols[i];
604b2f1ab58SBarry Smith                   r_val  = retval.values[i];
605b2f1ab58SBarry Smith                }
606b2f1ab58SBarry Smith                CHKERRQ(ierr);
607b2f1ab58SBarry Smith             }
608b2f1ab58SBarry Smith 
609b2f1ab58SBarry Smith             // Now, fill in
610b2f1ab58SBarry Smith             retval.icols[k][retval.row_nnz[k]] = i;
611b2f1ab58SBarry Smith             retval.values[k][retval.row_nnz[k]] = val[k];
612b2f1ab58SBarry Smith             retval.row_nnz[k]++;
613b2f1ab58SBarry Smith          }
614b2f1ab58SBarry Smith          val[k] = 0;
615b2f1ab58SBarry Smith       }
616b2f1ab58SBarry Smith #ifdef cholesky_debug
617b2f1ab58SBarry Smith       if (idebug>=2) {printf("\n\n");}
618b2f1ab58SBarry Smith #endif
619b2f1ab58SBarry Smith 
620b2f1ab58SBarry Smith       // Erase the values used in the work arrays
621b2f1ab58SBarry Smith       for (j=0; j<r_nnz; j++) { lvec[r_icol[j]] = 0; }
622b2f1ab58SBarry Smith 
623b2f1ab58SBarry Smith #ifdef cholesky_debug
624b2f1ab58SBarry Smith       if (idebug>=2)
625b2f1ab58SBarry Smith       {
626b2f1ab58SBarry Smith          printf("new L=\n");
627b2f1ab58SBarry Smith          for (jL=i; jL<nrows; jL++)
628b2f1ab58SBarry Smith          {
629b2f1ab58SBarry Smith              j = retval.row_nnz[jL]-1;
630b2f1ab58SBarry Smith              if (j>=0)
631b2f1ab58SBarry Smith              {
632b2f1ab58SBarry Smith                 k = retval.icols[jL][j];
633b2f1ab58SBarry Smith                 if (k==i) printf(fmt, jL+1,i+1,retval.values[jL][j]);
634b2f1ab58SBarry Smith              }
635b2f1ab58SBarry Smith          }
636b2f1ab58SBarry Smith          printf("\n\n");
637b2f1ab58SBarry Smith       }
638b2f1ab58SBarry Smith #endif
639b2f1ab58SBarry Smith    }
640b2f1ab58SBarry Smith 
641b2f1ab58SBarry Smith    ierr=PetscFree(lvec); CHKERRQ(ierr); ierr=PetscFree(val); CHKERRQ(ierr);
642b2f1ab58SBarry Smith 
643b2f1ab58SBarry Smith    ierr = spbas_cholesky_garbage_collect( &retval, nrows, &n_row_alloc_ok,
644b2f1ab58SBarry Smith                                      &n_alloc_used, max_row_nnz);
645b2f1ab58SBarry Smith    ierr=PetscFree(max_row_nnz); CHKERRQ(ierr);
646b2f1ab58SBarry Smith 
647b2f1ab58SBarry Smith    // Place the inverse of the diagonals in the matrix
648b2f1ab58SBarry Smith    for (i=0; i<nrows; i++)
649b2f1ab58SBarry Smith    {
650b2f1ab58SBarry Smith       r_nnz = retval.row_nnz[i];
651b2f1ab58SBarry Smith       retval.values[i][r_nnz-1] = 1.0 / diag[i];
652b2f1ab58SBarry Smith       for (j=0; j<r_nnz-1; j++)
653b2f1ab58SBarry Smith       {
654b2f1ab58SBarry Smith          retval.values[i][j] *= -1;
655b2f1ab58SBarry Smith       }
656b2f1ab58SBarry Smith    }
657b2f1ab58SBarry Smith    ierr=PetscFree(diag); CHKERRQ(ierr);
658b2f1ab58SBarry Smith 
659b2f1ab58SBarry Smith 
660b2f1ab58SBarry Smith    // Return successfully
661b2f1ab58SBarry Smith    *matrix_L = retval;
662b2f1ab58SBarry Smith 
663b2f1ab58SBarry Smith #ifdef cholesky_debug
664b2f1ab58SBarry Smith    for (i = 0; i<nrows; i++)
665b2f1ab58SBarry Smith    {
666b2f1ab58SBarry Smith       printf("Row %d\n",i+1);
667b2f1ab58SBarry Smith       for (j=0; j<retval.row_nnz[i]; j++)
668b2f1ab58SBarry Smith       {
669b2f1ab58SBarry Smith          printf(fmt, i+1,retval.icols[i][j]+1,retval.values[i][j]);
670b2f1ab58SBarry Smith       }
671b2f1ab58SBarry Smith    }
672b2f1ab58SBarry Smith #endif
673b2f1ab58SBarry Smith    PetscFunctionReturn(0);
674b2f1ab58SBarry Smith }
675b2f1ab58SBarry Smith 
676