xref: /petsc/src/mat/impls/aij/seq/bas/spbas_cholesky.h (revision 503c0ea9b45bcfbcebbb1ea5341243bbc69f0bea)
1 
2 /*
3    spbas_cholesky_row_alloc:
4       in the data arrays, find a place where another row may be stored.
5       Return PETSC_ERR_MEM if there is insufficient space to store the
6       row, so garbage collection and/or re-allocation may be done.
7 */
8 PetscBool spbas_cholesky_row_alloc(spbas_matrix retval, PetscInt k, PetscInt r_nnz,PetscInt * n_alloc_used)
9 {
10   PetscFunctionBegin;
11   retval.icols[k]  = &retval.alloc_icol[*n_alloc_used];
12   retval.values[k] = &retval.alloc_val[*n_alloc_used];
13   *n_alloc_used   += r_nnz;
14   if (*n_alloc_used > retval.n_alloc_icol) PetscFunctionReturn(PETSC_FALSE);
15   PetscFunctionReturn(PETSC_TRUE);
16 }
17 
18 /*
19   spbas_cholesky_garbage_collect:
20      move the rows which have been calculated so far, as well as
21      those currently under construction, to the front of the
22      array, while putting them in the proper order.
23      When it seems necessary, enlarge the current arrays.
24 
25      NB: re-allocation is being simulated using
26          PetscMalloc, memcpy, PetscFree, because
27          PetscRealloc does not seem to exist.
28 
29 */
30 PetscErrorCode spbas_cholesky_garbage_collect(spbas_matrix *result,         /* I/O: the Cholesky factor matrix being constructed.
31                                                                                     Only the storage, not the contents of this matrix is changed in this function */
32                                               PetscInt     i_row,           /* I  : Number of rows for which the final contents are known */
33                                               PetscInt     *n_row_alloc_ok, /* I/O: Number of rows which are already in their final
34                                                                                     places in the arrays: they need not be moved any more */
35                                               PetscInt     *n_alloc_used,   /* I/O:  */
36                                               PetscInt     *max_row_nnz)    /* I  : Over-estimate of the number of nonzeros needed to store each row */
37 {
38 /* PSEUDO-CODE:
39   1. Choose the appropriate size for the arrays
40   2. Rescue the arrays which would be lost during garbage collection
41   3. Reallocate and correct administration
42   4. Move all arrays so that they are in proper order */
43 
44   PetscInt        i,j;
45   PetscInt        nrows         = result->nrows;
46   PetscInt        n_alloc_ok    =0;
47   PetscInt        n_alloc_ok_max=0;
48   PetscInt        need_already  = 0;
49   PetscInt        n_rows_ahead  =0;
50   PetscInt        max_need_extra= 0;
51   PetscInt        n_alloc_max, n_alloc_est, n_alloc;
52   PetscInt        n_alloc_now     = result->n_alloc_icol;
53   PetscInt        *alloc_icol_old = result->alloc_icol;
54   PetscScalar     *alloc_val_old  = result->alloc_val;
55   PetscInt        *icol_rescue;
56   PetscScalar     *val_rescue;
57   PetscInt        n_rescue;
58   PetscInt        n_row_rescue;
59   PetscInt        i_here, i_last, n_copy;
60   const PetscReal xtra_perc = 20;
61 
62   PetscFunctionBegin;
63   /*********************************************************
64   1. Choose appropriate array size
65   Count number of rows and memory usage which is already final */
66   for (i=0; i<i_row; i++)  {
67     n_alloc_ok     += result->row_nnz[i];
68     n_alloc_ok_max += max_row_nnz[i];
69   }
70 
71   /* Count currently needed memory usage and future memory requirements
72     (max, predicted)*/
73   for (i=i_row; i<nrows; i++) {
74     if (!result->row_nnz[i]) {
75       max_need_extra += max_row_nnz[i];
76     } else {
77       need_already += max_row_nnz[i];
78       n_rows_ahead++;
79     }
80   }
81 
82   /* Make maximal and realistic memory requirement estimates */
83   n_alloc_max = n_alloc_ok + need_already + max_need_extra;
84   n_alloc_est = n_alloc_ok + need_already + (int) (((PetscReal) max_need_extra) *  ((PetscReal) n_alloc_ok) /((PetscReal) n_alloc_ok_max));
85 
86   /* Choose array sizes */
87   if (n_alloc_max == n_alloc_est) n_alloc = n_alloc_max;
88   else if (n_alloc_now >= n_alloc_est) n_alloc = n_alloc_now;
89   else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) n_alloc = n_alloc_max;
90   else n_alloc = (int) (n_alloc_est * (1+xtra_perc/100.0));
91 
92   /* If new estimate is less than what we already have,
93     don't reallocate, just garbage-collect */
94   if (n_alloc_max != n_alloc_est && n_alloc < result->n_alloc_icol) {
95     n_alloc = result->n_alloc_icol;
96   }
97 
98   /* Motivate dimension choice */
99   PetscCall(PetscInfo(NULL,"   Allocating %" PetscInt_FMT " nonzeros: ",n_alloc));
100   if (n_alloc_max == n_alloc_est) {
101     PetscCall(PetscInfo(NULL,"this is the correct size\n"));
102   } else if (n_alloc_now >= n_alloc_est) {
103     PetscCall(PetscInfo(NULL,"the current size, which seems enough\n"));
104   } else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) {
105     PetscCall(PetscInfo(NULL,"the maximum estimate\n"));
106   } else {
107     PetscCall(PetscInfo(NULL,"%6.2f %% more than the estimate\n",(double)xtra_perc));
108   }
109 
110   /**********************************************************
111   2. Rescue arrays which would be lost
112   Count how many rows and nonzeros will have to be rescued
113   when moving all arrays in place */
114   n_row_rescue = 0; n_rescue = 0;
115   if (*n_row_alloc_ok==0) *n_alloc_used = 0;
116   else {
117     i = *n_row_alloc_ok - 1;
118 
119     *n_alloc_used = (result->icols[i]-result->alloc_icol) +  result->row_nnz[i];
120   }
121 
122   for (i=*n_row_alloc_ok; i<nrows; i++) {
123     i_here = result->icols[i]-result->alloc_icol;
124     i_last = i_here + result->row_nnz[i];
125     if (result->row_nnz[i]>0) {
126       if (*n_alloc_used > i_here || i_last > n_alloc) {
127         n_rescue += result->row_nnz[i];
128         n_row_rescue++;
129       }
130 
131       if (i<i_row) *n_alloc_used += result->row_nnz[i];
132       else         *n_alloc_used += max_row_nnz[i];
133     }
134   }
135 
136   /* Allocate rescue arrays */
137   PetscCall(PetscMalloc1(n_rescue, &icol_rescue));
138   PetscCall(PetscMalloc1(n_rescue, &val_rescue));
139 
140   /* Rescue the arrays which need rescuing */
141   n_row_rescue = 0; n_rescue = 0;
142   if (*n_row_alloc_ok==0) *n_alloc_used = 0;
143   else {
144     i             = *n_row_alloc_ok - 1;
145     *n_alloc_used = (result->icols[i]-result->alloc_icol) +  result->row_nnz[i];
146   }
147 
148   for (i=*n_row_alloc_ok; i<nrows; i++) {
149     i_here = result->icols[i]-result->alloc_icol;
150     i_last = i_here + result->row_nnz[i];
151     if (result->row_nnz[i]>0) {
152       if (*n_alloc_used > i_here || i_last > n_alloc) {
153         PetscCall(PetscArraycpy(&icol_rescue[n_rescue], result->icols[i], result->row_nnz[i]));
154         PetscCall(PetscArraycpy(&val_rescue[n_rescue], result->values[i], result->row_nnz[i]));
155         n_rescue += result->row_nnz[i];
156         n_row_rescue++;
157       }
158 
159       if (i<i_row) *n_alloc_used += result->row_nnz[i];
160       else         *n_alloc_used += max_row_nnz[i];
161     }
162   }
163 
164   /**********************************************************
165   3. Reallocate and correct administration */
166 
167   if (n_alloc != result->n_alloc_icol) {
168     n_copy = PetscMin(n_alloc,result->n_alloc_icol);
169 
170     /* PETSC knows no REALLOC, so we'll REALLOC ourselves.
171 
172         Allocate new icol-data, copy old contents */
173     PetscCall(PetscMalloc1(n_alloc, &result->alloc_icol));
174     PetscCall(PetscArraycpy(result->alloc_icol, alloc_icol_old, n_copy));
175 
176     /* Update administration, Reset pointers to new arrays  */
177     result->n_alloc_icol = n_alloc;
178     for (i=0; i<nrows; i++) {
179       result->icols[i]  =  result->alloc_icol + (result->icols[i]  - alloc_icol_old);
180       result->values[i] =  result->alloc_val  + (result->icols[i]  - result->alloc_icol);
181     }
182 
183     /* Delete old array */
184     PetscCall(PetscFree(alloc_icol_old));
185 
186     /* Allocate new value-data, copy old contents */
187     PetscCall(PetscMalloc1(n_alloc, &result->alloc_val));
188     PetscCall(PetscArraycpy(result->alloc_val, alloc_val_old, n_copy));
189 
190     /* Update administration, Reset pointers to new arrays  */
191     result->n_alloc_val = n_alloc;
192     for (i=0; i<nrows; i++) {
193       result->values[i] =  result->alloc_val + (result->icols[i]  - result->alloc_icol);
194     }
195 
196     /* Delete old array */
197     PetscCall(PetscFree(alloc_val_old));
198   }
199 
200   /*********************************************************
201   4. Copy all the arrays to their proper places */
202   n_row_rescue = 0; n_rescue = 0;
203   if (*n_row_alloc_ok==0) *n_alloc_used = 0;
204   else {
205     i = *n_row_alloc_ok - 1;
206 
207     *n_alloc_used = (result->icols[i]-result->alloc_icol) +  result->row_nnz[i];
208   }
209 
210   for (i=*n_row_alloc_ok; i<nrows; i++) {
211     i_here = result->icols[i]-result->alloc_icol;
212     i_last = i_here + result->row_nnz[i];
213 
214     result->icols[i] = result->alloc_icol + *n_alloc_used;
215     result->values[i]= result->alloc_val  + *n_alloc_used;
216 
217     if (result->row_nnz[i]>0) {
218       if (*n_alloc_used > i_here || i_last > n_alloc) {
219         PetscCall(PetscArraycpy(result->icols[i], &icol_rescue[n_rescue], result->row_nnz[i]));
220         PetscCall(PetscArraycpy(result->values[i],&val_rescue[n_rescue],result->row_nnz[i]));
221 
222         n_rescue += result->row_nnz[i];
223         n_row_rescue++;
224       } else {
225         for (j=0; j<result->row_nnz[i]; j++) {
226           result->icols[i][j]  = result->alloc_icol[i_here+j];
227           result->values[i][j] = result->alloc_val[i_here+j];
228         }
229       }
230       if (i<i_row) *n_alloc_used += result->row_nnz[i];
231       else         *n_alloc_used += max_row_nnz[i];
232     }
233   }
234 
235   /* Delete the rescue arrays */
236   PetscCall(PetscFree(icol_rescue));
237   PetscCall(PetscFree(val_rescue));
238 
239   *n_row_alloc_ok = i_row;
240   PetscFunctionReturn(0);
241 }
242 
243 /*
244   spbas_incomplete_cholesky:
245      incomplete Cholesky decomposition of a square, symmetric,
246      positive definite matrix.
247 
248      In case negative diagonals are encountered, function returns
249      NEGATIVE_DIAGONAL. When this happens, call this function again
250      with a larger epsdiag_in, a less sparse pattern, and/or a smaller
251      droptol
252 */
253 PetscErrorCode spbas_incomplete_cholesky(Mat A, const PetscInt *rip, const PetscInt *riip, spbas_matrix pattern, PetscReal droptol, PetscReal epsdiag_in, spbas_matrix * matrix_L,PetscBool *success)
254 {
255   PetscInt        jL;
256   Mat_SeqAIJ      *a =(Mat_SeqAIJ*)A->data;
257   PetscInt        *ai=a->i,*aj=a->j;
258   MatScalar       *aa=a->a;
259   PetscInt        nrows, ncols;
260   PetscInt        *max_row_nnz;
261   spbas_matrix    retval;
262   PetscScalar     *diag;
263   PetscScalar     *val;
264   PetscScalar     *lvec;
265   PetscScalar     epsdiag;
266   PetscInt        i,j,k;
267   const PetscBool do_values = PETSC_TRUE;
268   PetscInt        *r1_icol;
269   PetscScalar     *r1_val;
270   PetscInt        *r_icol;
271   PetscInt        r_nnz;
272   PetscScalar     *r_val;
273   PetscInt        *A_icol;
274   PetscInt        A_nnz;
275   PetscScalar     *A_val;
276   PetscInt        *p_icol;
277   PetscInt        p_nnz;
278   PetscInt        n_row_alloc_ok = 0;   /* number of rows which have been stored   correctly in the matrix */
279   PetscInt        n_alloc_used   = 0;   /* part of result->icols and result->values   which is currently being used */
280 
281   PetscFunctionBegin;
282   /* Convert the Manteuffel shift from 'fraction of average diagonal' to   dimensioned value */
283   PetscCall(MatGetSize(A, &nrows, &ncols));
284   PetscCall(MatGetTrace(A, &epsdiag));
285 
286   epsdiag *= epsdiag_in / nrows;
287 
288   PetscCall(PetscInfo(NULL,"   Dimensioned Manteuffel shift %g Drop tolerance %g\n", (double)PetscRealPart(epsdiag),(double)droptol));
289 
290   if (droptol<1e-10) droptol=1e-10;
291 
292   retval.nrows        = nrows;
293   retval.ncols        = nrows;
294   retval.nnz          = pattern.nnz/10;
295   retval.col_idx_type = SPBAS_COLUMN_NUMBERS;
296   retval.block_data   = PETSC_TRUE;
297 
298   PetscCall(spbas_allocate_pattern(&retval, do_values));
299   PetscCall(PetscArrayzero(retval.row_nnz, nrows));
300   PetscCall(spbas_allocate_data(&retval));
301   retval.nnz = 0;
302 
303   PetscCall(PetscMalloc1(nrows, &diag));
304   PetscCall(PetscCalloc1(nrows, &val));
305   PetscCall(PetscCalloc1(nrows, &lvec));
306   PetscCall(PetscCalloc1(nrows, &max_row_nnz));
307 
308   /* Count the nonzeros on transpose of pattern */
309   for (i = 0; i<nrows; i++)  {
310     p_nnz  = pattern.row_nnz[i];
311     p_icol = pattern.icols[i];
312     for (j=0; j<p_nnz; j++)  {
313       max_row_nnz[i+p_icol[j]]++;
314     }
315   }
316 
317   /* Calculate rows of L */
318   for (i = 0; i<nrows; i++)  {
319     p_nnz  = pattern.row_nnz[i];
320     p_icol = pattern.icols[i];
321 
322     r_nnz  = retval.row_nnz[i];
323     r_icol = retval.icols[i];
324     r_val  = retval.values[i];
325     A_nnz  = ai[rip[i]+1] - ai[rip[i]];
326     A_icol = &aj[ai[rip[i]]];
327     A_val  = &aa[ai[rip[i]]];
328 
329     /* Calculate  val += A(i,i:n)'; */
330     for (j=0; j<A_nnz; j++) {
331       k = riip[A_icol[j]];
332       if (k>=i) val[k] = A_val[j];
333     }
334 
335     /*  Add regularization */
336     val[i] += epsdiag;
337 
338     /* Calculate lvec   = diag(D(0:i-1)) * L(0:i-1,i);
339         val(i) = A(i,i) - L(0:i-1,i)' * lvec */
340     for (j=0; j<r_nnz; j++)  {
341       k       = r_icol[j];
342       lvec[k] = diag[k] * r_val[j];
343       val[i] -= r_val[j] * lvec[k];
344     }
345 
346     /* Calculate the new diagonal */
347     diag[i] = val[i];
348     if (PetscRealPart(diag[i])<droptol) {
349       PetscCall(PetscInfo(NULL,"Error in spbas_incomplete_cholesky:\n"));
350       PetscCall(PetscInfo(NULL,"Negative diagonal in row %" PetscInt_FMT "\n",i+1));
351 
352       /* Delete the whole matrix at once. */
353       PetscCall(spbas_delete(retval));
354       *success = PETSC_FALSE;
355       PetscFunctionReturn(0);
356     }
357 
358     /* If necessary, allocate arrays */
359     if (r_nnz==0) {
360       PetscBool success = spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used);
361       PetscCheck(success,PETSC_COMM_SELF,PETSC_ERR_MEM,"spbas_cholesky_row_alloc() failed");
362       r_icol = retval.icols[i];
363       r_val  = retval.values[i];
364     }
365 
366     /* Now, fill in */
367     r_icol[r_nnz] = i;
368     r_val [r_nnz] = 1.0;
369     r_nnz++;
370     retval.row_nnz[i]++;
371 
372     retval.nnz += r_nnz;
373 
374     /* Calculate
375         val(i+1:n) = (A(i,i+1:n)- L(0:i-1,i+1:n)' * lvec)/diag(i) */
376     for (j=1; j<p_nnz; j++)  {
377       k       = i+p_icol[j];
378       r1_icol = retval.icols[k];
379       r1_val  = retval.values[k];
380       for (jL=0; jL<retval.row_nnz[k]; jL++) {
381         val[k] -= r1_val[jL] * lvec[r1_icol[jL]];
382       }
383       val[k] /= diag[i];
384 
385       if (PetscAbsScalar(val[k]) > droptol || PetscAbsScalar(val[k])< -droptol) {
386         /* If necessary, allocate arrays */
387         if (!retval.row_nnz[k]) {
388           PetscBool flag,success = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);
389           if (!success) {
390             PetscCall(spbas_cholesky_garbage_collect(&retval,  i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz));
391             flag   = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);
392             PetscCheck(flag,PETSC_COMM_SELF,PETSC_ERR_MEM,"Allocation in spbas_cholesky_row_alloc() failed");
393             r_icol = retval.icols[i];
394           }
395         }
396 
397         retval.icols[k][retval.row_nnz[k]]  = i;
398         retval.values[k][retval.row_nnz[k]] = val[k];
399         retval.row_nnz[k]++;
400       }
401       val[k] = 0;
402     }
403 
404     /* Erase the values used in the work arrays */
405     for (j=0; j<r_nnz; j++) lvec[r_icol[j]] = 0;
406   }
407 
408   PetscCall(PetscFree(lvec));
409   PetscCall(PetscFree(val));
410 
411   PetscCall(spbas_cholesky_garbage_collect(&retval, nrows, &n_row_alloc_ok, &n_alloc_used, max_row_nnz));
412   PetscCall(PetscFree(max_row_nnz));
413 
414   /* Place the inverse of the diagonals in the matrix */
415   for (i=0; i<nrows; i++) {
416     r_nnz = retval.row_nnz[i];
417 
418     retval.values[i][r_nnz-1] = 1.0 / diag[i];
419     for (j=0; j<r_nnz-1; j++) {
420       retval.values[i][j] *= -1;
421     }
422   }
423   PetscCall(PetscFree(diag));
424   *matrix_L = retval;
425   *success  = PETSC_TRUE;
426   PetscFunctionReturn(0);
427 }
428