xref: /petsc/src/mat/impls/aij/seq/bas/spbas_cholesky.h (revision 3ea99036a5fedea4d39e7e77471d0ab500c249d7)
1 #ifndef SPBAS_CHOLESKY_H
2 #define SPBAS_CHOLESKY_H
3 /*
4    spbas_cholesky_row_alloc:
5       in the data arrays, find a place where another row may be stored.
6       Return PETSC_ERR_MEM if there is insufficient space to store the
7       row, so garbage collection and/or re-allocation may be done.
8 */
9 static PetscBool spbas_cholesky_row_alloc(spbas_matrix retval, PetscInt k, PetscInt r_nnz, PetscInt *n_alloc_used)
10 {
11   retval.icols[k]  = &retval.alloc_icol[*n_alloc_used];
12   retval.values[k] = &retval.alloc_val[*n_alloc_used];
13   *n_alloc_used += r_nnz;
14   return (*n_alloc_used > retval.n_alloc_icol) ? PETSC_FALSE : PETSC_TRUE;
15 }
16 
17 /*
18   spbas_cholesky_garbage_collect:
19      move the rows which have been calculated so far, as well as
20      those currently under construction, to the front of the
21      array, while putting them in the proper order.
22      When it seems necessary, enlarge the current arrays.
23 
24      NB: re-allocation is being simulated using
25          PetscMalloc, memcpy, PetscFree, because
26          PetscRealloc does not seem to exist.
27 
28 */
29 static PetscErrorCode spbas_cholesky_garbage_collect(spbas_matrix *result,         /* I/O: the Cholesky factor matrix being constructed.
30                                                                                     Only the storage, not the contents of this matrix is changed in this function */
31                                                      PetscInt      i_row,          /* I  : Number of rows for which the final contents are known */
32                                                      PetscInt     *n_row_alloc_ok, /* I/O: Number of rows which are already in their final
33                                                                                     places in the arrays: they need not be moved any more */
34                                                      PetscInt     *n_alloc_used,   /* I/O:  */
35                                                      PetscInt     *max_row_nnz)        /* I  : Over-estimate of the number of nonzeros needed to store each row */
36 {
37   /* PSEUDO-CODE:
38   1. Choose the appropriate size for the arrays
39   2. Rescue the arrays which would be lost during garbage collection
40   3. Reallocate and correct administration
41   4. Move all arrays so that they are in proper order */
42 
43   PetscInt        i, j;
44   PetscInt        nrows          = result->nrows;
45   PetscInt        n_alloc_ok     = 0;
46   PetscInt        n_alloc_ok_max = 0;
47   PetscInt        need_already   = 0;
48   PetscInt        max_need_extra = 0;
49   PetscInt        n_alloc_max, n_alloc_est, n_alloc;
50   PetscInt        n_alloc_now    = result->n_alloc_icol;
51   PetscInt       *alloc_icol_old = result->alloc_icol;
52   PetscScalar    *alloc_val_old  = result->alloc_val;
53   PetscInt       *icol_rescue;
54   PetscScalar    *val_rescue;
55   PetscInt        n_rescue;
56   PetscInt        i_here, i_last, n_copy;
57   const PetscReal xtra_perc = 20;
58 
59   PetscFunctionBegin;
60   /*********************************************************
61   1. Choose appropriate array size
62   Count number of rows and memory usage which is already final */
63   for (i = 0; i < i_row; i++) {
64     n_alloc_ok += result->row_nnz[i];
65     n_alloc_ok_max += max_row_nnz[i];
66   }
67 
68   /* Count currently needed memory usage and future memory requirements
69     (max, predicted)*/
70   for (i = i_row; i < nrows; i++) {
71     if (!result->row_nnz[i]) {
72       max_need_extra += max_row_nnz[i];
73     } else {
74       need_already += max_row_nnz[i];
75     }
76   }
77 
78   /* Make maximal and realistic memory requirement estimates */
79   n_alloc_max = n_alloc_ok + need_already + max_need_extra;
80   n_alloc_est = n_alloc_ok + need_already + (int)(((PetscReal)max_need_extra) * ((PetscReal)n_alloc_ok) / ((PetscReal)n_alloc_ok_max));
81 
82   /* Choose array sizes */
83   if (n_alloc_max == n_alloc_est) n_alloc = n_alloc_max;
84   else if (n_alloc_now >= n_alloc_est) n_alloc = n_alloc_now;
85   else if (n_alloc_max < n_alloc_est * (1 + xtra_perc / 100.0)) n_alloc = n_alloc_max;
86   else n_alloc = (int)(n_alloc_est * (1 + xtra_perc / 100.0));
87 
88   /* If new estimate is less than what we already have,
89     don't reallocate, just garbage-collect */
90   if (n_alloc_max != n_alloc_est && n_alloc < result->n_alloc_icol) n_alloc = result->n_alloc_icol;
91 
92   /* Motivate dimension choice */
93   PetscCall(PetscInfo(NULL, "   Allocating %" PetscInt_FMT " nonzeros: ", n_alloc)); /* checkbadSource \n */
94   if (n_alloc_max == n_alloc_est) {
95     PetscCall(PetscInfo(NULL, "this is the correct size\n"));
96   } else if (n_alloc_now >= n_alloc_est) {
97     PetscCall(PetscInfo(NULL, "the current size, which seems enough\n"));
98   } else if (n_alloc_max < n_alloc_est * (1 + xtra_perc / 100.0)) {
99     PetscCall(PetscInfo(NULL, "the maximum estimate\n"));
100   } else {
101     PetscCall(PetscInfo(NULL, "%6.2f %% more than the estimate\n", (double)xtra_perc));
102   }
103 
104   /**********************************************************
105   2. Rescue arrays which would be lost
106   Count how many rows and nonzeros will have to be rescued
107   when moving all arrays in place */
108   n_rescue = 0;
109   if (*n_row_alloc_ok == 0) *n_alloc_used = 0;
110   else {
111     i = *n_row_alloc_ok - 1;
112 
113     *n_alloc_used = (result->icols[i] - result->alloc_icol) + result->row_nnz[i];
114   }
115 
116   for (i = *n_row_alloc_ok; i < nrows; i++) {
117     i_here = result->icols[i] - result->alloc_icol;
118     i_last = i_here + result->row_nnz[i];
119     if (result->row_nnz[i] > 0) {
120       if (*n_alloc_used > i_here || i_last > n_alloc) n_rescue += result->row_nnz[i];
121 
122       if (i < i_row) *n_alloc_used += result->row_nnz[i];
123       else *n_alloc_used += max_row_nnz[i];
124     }
125   }
126 
127   /* Allocate rescue arrays */
128   PetscCall(PetscMalloc1(n_rescue, &icol_rescue));
129   PetscCall(PetscMalloc1(n_rescue, &val_rescue));
130 
131   /* Rescue the arrays which need rescuing */
132   n_rescue = 0;
133   if (*n_row_alloc_ok == 0) *n_alloc_used = 0;
134   else {
135     i             = *n_row_alloc_ok - 1;
136     *n_alloc_used = (result->icols[i] - result->alloc_icol) + result->row_nnz[i];
137   }
138 
139   for (i = *n_row_alloc_ok; i < nrows; i++) {
140     i_here = result->icols[i] - result->alloc_icol;
141     i_last = i_here + result->row_nnz[i];
142     if (result->row_nnz[i] > 0) {
143       if (*n_alloc_used > i_here || i_last > n_alloc) {
144         PetscCall(PetscArraycpy(&icol_rescue[n_rescue], result->icols[i], result->row_nnz[i]));
145         PetscCall(PetscArraycpy(&val_rescue[n_rescue], result->values[i], result->row_nnz[i]));
146         n_rescue += result->row_nnz[i];
147       }
148 
149       if (i < i_row) *n_alloc_used += result->row_nnz[i];
150       else *n_alloc_used += max_row_nnz[i];
151     }
152   }
153 
154   /**********************************************************
155   3. Reallocate and correct administration */
156 
157   if (n_alloc != result->n_alloc_icol) {
158     n_copy = PetscMin(n_alloc, result->n_alloc_icol);
159 
160     /* PETSC knows no REALLOC, so we'll REALLOC ourselves.
161 
162         Allocate new icol-data, copy old contents */
163     PetscCall(PetscMalloc1(n_alloc, &result->alloc_icol));
164     PetscCall(PetscArraycpy(result->alloc_icol, alloc_icol_old, n_copy));
165 
166     /* Update administration, Reset pointers to new arrays  */
167     result->n_alloc_icol = n_alloc;
168     for (i = 0; i < nrows; i++) {
169       result->icols[i]  = result->alloc_icol + (result->icols[i] - alloc_icol_old);
170       result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol);
171     }
172 
173     /* Delete old array */
174     PetscCall(PetscFree(alloc_icol_old));
175 
176     /* Allocate new value-data, copy old contents */
177     PetscCall(PetscMalloc1(n_alloc, &result->alloc_val));
178     PetscCall(PetscArraycpy(result->alloc_val, alloc_val_old, n_copy));
179 
180     /* Update administration, Reset pointers to new arrays  */
181     result->n_alloc_val = n_alloc;
182     for (i = 0; i < nrows; i++) result->values[i] = result->alloc_val + (result->icols[i] - result->alloc_icol);
183 
184     /* Delete old array */
185     PetscCall(PetscFree(alloc_val_old));
186   }
187 
188   /*********************************************************
189   4. Copy all the arrays to their proper places */
190   n_rescue = 0;
191   if (*n_row_alloc_ok == 0) *n_alloc_used = 0;
192   else {
193     i = *n_row_alloc_ok - 1;
194 
195     *n_alloc_used = (result->icols[i] - result->alloc_icol) + result->row_nnz[i];
196   }
197 
198   for (i = *n_row_alloc_ok; i < nrows; i++) {
199     i_here = result->icols[i] - result->alloc_icol;
200     i_last = i_here + result->row_nnz[i];
201 
202     result->icols[i]  = result->alloc_icol + *n_alloc_used;
203     result->values[i] = result->alloc_val + *n_alloc_used;
204 
205     if (result->row_nnz[i] > 0) {
206       if (*n_alloc_used > i_here || i_last > n_alloc) {
207         PetscCall(PetscArraycpy(result->icols[i], &icol_rescue[n_rescue], result->row_nnz[i]));
208         PetscCall(PetscArraycpy(result->values[i], &val_rescue[n_rescue], result->row_nnz[i]));
209 
210         n_rescue += result->row_nnz[i];
211       } else {
212         for (j = 0; j < result->row_nnz[i]; j++) {
213           result->icols[i][j]  = result->alloc_icol[i_here + j];
214           result->values[i][j] = result->alloc_val[i_here + j];
215         }
216       }
217       if (i < i_row) *n_alloc_used += result->row_nnz[i];
218       else *n_alloc_used += max_row_nnz[i];
219     }
220   }
221 
222   /* Delete the rescue arrays */
223   PetscCall(PetscFree(icol_rescue));
224   PetscCall(PetscFree(val_rescue));
225 
226   *n_row_alloc_ok = i_row;
227   PetscFunctionReturn(PETSC_SUCCESS);
228 }
229 
230 /*
231   spbas_incomplete_cholesky:
232      incomplete Cholesky decomposition of a square, symmetric,
233      positive definite matrix.
234 
235      In case negative diagonals are encountered, function returns
236      NEGATIVE_DIAGONAL. When this happens, call this function again
237      with a larger epsdiag_in, a less sparse pattern, and/or a smaller
238      droptol
239 */
240 PetscErrorCode spbas_incomplete_cholesky(Mat A, const PetscInt *rip, const PetscInt *riip, spbas_matrix pattern, PetscReal droptol, PetscReal epsdiag_in, spbas_matrix *matrix_L, PetscBool *success)
241 {
242   PetscInt        jL;
243   Mat_SeqAIJ     *a  = (Mat_SeqAIJ *)A->data;
244   PetscInt       *ai = a->i, *aj = a->j;
245   MatScalar      *aa = a->a;
246   PetscInt        nrows, ncols;
247   PetscInt       *max_row_nnz;
248   spbas_matrix    retval;
249   PetscScalar    *diag;
250   PetscScalar    *val;
251   PetscScalar    *lvec;
252   PetscScalar     epsdiag;
253   PetscInt        i, j, k;
254   const PetscBool do_values = PETSC_TRUE;
255   PetscInt       *r1_icol;
256   PetscScalar    *r1_val;
257   PetscInt       *r_icol;
258   PetscInt        r_nnz;
259   PetscScalar    *r_val;
260   PetscInt       *A_icol;
261   PetscInt        A_nnz;
262   PetscScalar    *A_val;
263   PetscInt       *p_icol;
264   PetscInt        p_nnz;
265   PetscInt        n_row_alloc_ok = 0; /* number of rows which have been stored   correctly in the matrix */
266   PetscInt        n_alloc_used   = 0; /* part of result->icols and result->values   which is currently being used */
267 
268   PetscFunctionBegin;
269   /* Convert the Manteuffel shift from 'fraction of average diagonal' to   dimensioned value */
270   PetscCall(MatGetSize(A, &nrows, &ncols));
271   PetscCall(MatGetTrace(A, &epsdiag));
272 
273   epsdiag *= epsdiag_in / nrows;
274 
275   PetscCall(PetscInfo(NULL, "   Dimensioned Manteuffel shift %g Drop tolerance %g\n", (double)PetscRealPart(epsdiag), (double)droptol));
276 
277   if (droptol < 1e-10) droptol = 1e-10;
278 
279   retval.nrows        = nrows;
280   retval.ncols        = nrows;
281   retval.nnz          = pattern.nnz / 10;
282   retval.col_idx_type = SPBAS_COLUMN_NUMBERS;
283   retval.block_data   = PETSC_TRUE;
284 
285   PetscCall(spbas_allocate_pattern(&retval, do_values));
286   PetscCall(PetscArrayzero(retval.row_nnz, nrows));
287   PetscCall(spbas_allocate_data(&retval));
288   retval.nnz = 0;
289 
290   PetscCall(PetscMalloc1(nrows, &diag));
291   PetscCall(PetscCalloc1(nrows, &val));
292   PetscCall(PetscCalloc1(nrows, &lvec));
293   PetscCall(PetscCalloc1(nrows, &max_row_nnz));
294 
295   /* Count the nonzeros on transpose of pattern */
296   for (i = 0; i < nrows; i++) {
297     p_nnz  = pattern.row_nnz[i];
298     p_icol = pattern.icols[i];
299     for (j = 0; j < p_nnz; j++) max_row_nnz[i + p_icol[j]]++;
300   }
301 
302   /* Calculate rows of L */
303   for (i = 0; i < nrows; i++) {
304     p_nnz  = pattern.row_nnz[i];
305     p_icol = pattern.icols[i];
306 
307     r_nnz  = retval.row_nnz[i];
308     r_icol = retval.icols[i];
309     r_val  = retval.values[i];
310     A_nnz  = ai[rip[i] + 1] - ai[rip[i]];
311     A_icol = &aj[ai[rip[i]]];
312     A_val  = &aa[ai[rip[i]]];
313 
314     /* Calculate  val += A(i,i:n)'; */
315     for (j = 0; j < A_nnz; j++) {
316       k = riip[A_icol[j]];
317       if (k >= i) val[k] = A_val[j];
318     }
319 
320     /*  Add regularization */
321     val[i] += epsdiag;
322 
323     /* Calculate lvec   = diag(D(0:i-1)) * L(0:i-1,i);
324         val(i) = A(i,i) - L(0:i-1,i)' * lvec */
325     for (j = 0; j < r_nnz; j++) {
326       k       = r_icol[j];
327       lvec[k] = diag[k] * r_val[j];
328       val[i] -= r_val[j] * lvec[k];
329     }
330 
331     /* Calculate the new diagonal */
332     diag[i] = val[i];
333     if (PetscRealPart(diag[i]) < droptol) {
334       PetscCall(PetscInfo(NULL, "Error in spbas_incomplete_cholesky:\n"));
335       PetscCall(PetscInfo(NULL, "Negative diagonal in row %" PetscInt_FMT "\n", i + 1));
336 
337       /* Delete the whole matrix at once. */
338       PetscCall(spbas_delete(retval));
339       *success = PETSC_FALSE;
340       PetscFunctionReturn(PETSC_SUCCESS);
341     }
342 
343     /* If necessary, allocate arrays */
344     if (r_nnz == 0) {
345       PetscBool success = spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used);
346       PetscCheck(success, PETSC_COMM_SELF, PETSC_ERR_MEM, "spbas_cholesky_row_alloc() failed");
347       r_icol = retval.icols[i];
348       r_val  = retval.values[i];
349     }
350 
351     /* Now, fill in */
352     r_icol[r_nnz] = i;
353     r_val[r_nnz]  = 1.0;
354     r_nnz++;
355     retval.row_nnz[i]++;
356 
357     retval.nnz += r_nnz;
358 
359     /* Calculate
360         val(i+1:n) = (A(i,i+1:n)- L(0:i-1,i+1:n)' * lvec)/diag(i) */
361     for (j = 1; j < p_nnz; j++) {
362       k       = i + p_icol[j];
363       r1_icol = retval.icols[k];
364       r1_val  = retval.values[k];
365       for (jL = 0; jL < retval.row_nnz[k]; jL++) val[k] -= r1_val[jL] * lvec[r1_icol[jL]];
366       val[k] /= diag[i];
367 
368       if (PetscAbsScalar(val[k]) > droptol || PetscAbsScalar(val[k]) < -droptol) {
369         /* If necessary, allocate arrays */
370         if (!retval.row_nnz[k]) {
371           PetscBool flag, success = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);
372           if (!success) {
373             PetscCall(spbas_cholesky_garbage_collect(&retval, i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz));
374             flag = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);
375             PetscCheck(flag, PETSC_COMM_SELF, PETSC_ERR_MEM, "Allocation in spbas_cholesky_row_alloc() failed");
376             r_icol = retval.icols[i];
377           }
378         }
379 
380         retval.icols[k][retval.row_nnz[k]]  = i;
381         retval.values[k][retval.row_nnz[k]] = val[k];
382         retval.row_nnz[k]++;
383       }
384       val[k] = 0;
385     }
386 
387     /* Erase the values used in the work arrays */
388     for (j = 0; j < r_nnz; j++) lvec[r_icol[j]] = 0;
389   }
390 
391   PetscCall(PetscFree(lvec));
392   PetscCall(PetscFree(val));
393 
394   PetscCall(spbas_cholesky_garbage_collect(&retval, nrows, &n_row_alloc_ok, &n_alloc_used, max_row_nnz));
395   PetscCall(PetscFree(max_row_nnz));
396 
397   /* Place the inverse of the diagonals in the matrix */
398   for (i = 0; i < nrows; i++) {
399     r_nnz = retval.row_nnz[i];
400 
401     retval.values[i][r_nnz - 1] = 1.0 / diag[i];
402     for (j = 0; j < r_nnz - 1; j++) retval.values[i][j] *= -1;
403   }
404   PetscCall(PetscFree(diag));
405   *matrix_L = retval;
406   *success  = PETSC_TRUE;
407   PetscFunctionReturn(PETSC_SUCCESS);
408 }
409 
410 #endif
411