xref: /petsc/src/mat/impls/aij/seq/bas/spbas_cholesky.h (revision 1147fc2a6bf7922defbbca15fc6c33f234803ff0)
1 
2 /*
3    spbas_cholesky_row_alloc:
4       in the data arrays, find a place where another row may be stored.
5       Return PETSC_ERR_MEM if there is insufficient space to store the
6       row, so garbage collection and/or re-allocation may be done.
7 */
8 #undef __FUNCT__
9 #define __FUNCT__ "spbas_cholesky_row_alloc"
10 PetscErrorCode spbas_cholesky_row_alloc(spbas_matrix retval, PetscInt k, PetscInt r_nnz,PetscInt * n_alloc_used)
11 {
12    PetscFunctionBegin;
13    retval.icols[k]  = &retval.alloc_icol[*n_alloc_used];
14    retval.values[k] = &retval.alloc_val[*n_alloc_used];
15    *n_alloc_used   += r_nnz;
16    if (*n_alloc_used > retval.n_alloc_icol) PetscFunctionReturn(PETSC_ERR_MEM);
17    PetscFunctionReturn(0);
18 }
19 
20 
21 /*
22   spbas_cholesky_garbage_collect:
23      move the rows which have been calculated so far, as well as
24      those currently under construction, to the front of the
25      array, while putting them in the proper order.
26      When it seems necessary, enlarge the current arrays.
27 
28      NB: re-allocation is being simulated using
29          PetscMalloc, memcpy, PetscFree, because
30          PetscRealloc does not seem to exist.
31 
32 */
33 #undef __FUNCT__
34 #define __FUNCT__ "spbas_cholesky_garbage_collect"
35 PetscErrorCode spbas_cholesky_garbage_collect(
36       spbas_matrix *result, /* I/O: the Cholesky factor matrix being constructed.
37                                    Only the storage, not the contents of this matrix
38                                    is changed in this function */
39       PetscInt     i_row,             /* I  : Number of rows for which the final contents are
40                                      known */
41       PetscInt     *n_row_alloc_ok,   /* I/O: Number of rows which are already in their final
42                                      places in the arrays: they need not be moved any more */
43       PetscInt     *n_alloc_used,     /* I/O:  */
44       PetscInt     *max_row_nnz)      /* I  : Over-estimate of the number of nonzeros needed to
45                                     store each row */
46 {
47 
48 
49   /* PSEUDO-CODE:
50     1. Choose the appropriate size for the arrays
51     2. Rescue the arrays which would be lost during garbage collection
52     3. Reallocate and correct administration
53     4. Move all arrays so that they are in proper order */
54 
55    PetscInt        i,j;
56    PetscInt        nrows = result->nrows;
57    PetscInt        n_alloc_ok=0;
58    PetscInt        n_alloc_ok_max=0;
59    PetscErrorCode  ierr;
60    PetscInt        need_already= 0;
61    PetscInt        n_rows_ahead=0;
62    PetscInt        max_need_extra= 0;
63    PetscInt        n_alloc_max, n_alloc_est, n_alloc;
64    PetscInt        n_alloc_now = result->n_alloc_icol;
65    PetscInt        *alloc_icol_old = result->alloc_icol;
66    PetscScalar     *alloc_val_old  = result->alloc_val;
67    PetscInt        *icol_rescue;
68    PetscScalar     *val_rescue;
69    PetscInt        n_rescue;
70    PetscInt        n_row_rescue;
71    PetscInt        i_here, i_last, n_copy;
72    const PetscReal xtra_perc = 20;
73 
74    PetscFunctionBegin;
75 
76    /*********************************************************
77     1. Choose appropriate array size
78     Count number of rows and memory usage which is already final */
79    for (i=0;i<i_row; i++)  {
80       n_alloc_ok += result->row_nnz[i];
81       n_alloc_ok_max += max_row_nnz[i];
82    }
83 
84    /* Count currently needed memory usage and future memory requirements
85       (max, predicted)*/
86    for (i=i_row; i<nrows; i++) {
87      if (!result->row_nnz[i]) {
88          max_need_extra  += max_row_nnz[i];
89       } else {
90          need_already    += max_row_nnz[i];
91          n_rows_ahead++;
92       }
93    }
94 
95    /* Make maximal and realistic memory requirement estimates */
96    n_alloc_max = n_alloc_ok + need_already + max_need_extra;
97    n_alloc_est = n_alloc_ok + need_already + (int) (((PetscReal) max_need_extra) *  ((PetscReal) n_alloc_ok) /((PetscReal) n_alloc_ok_max));
98 
99    /* Choose array sizes */
100    if (n_alloc_max == n_alloc_est)  { n_alloc = n_alloc_max; }
101    else if (n_alloc_now >= n_alloc_est)  { n_alloc = n_alloc_now; }
102    else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0))  { n_alloc  = n_alloc_max;  }
103    else { n_alloc = (int) (n_alloc_est * (1+xtra_perc/100.0)); }
104 
105    /* If new estimate is less than what we already have,
106       don't reallocate, just garbage-collect */
107    if (n_alloc_max != n_alloc_est && n_alloc < result->n_alloc_icol) {
108       n_alloc = result->n_alloc_icol;
109    }
110 
111    /* Motivate dimension choice */
112    ierr = PetscInfo1(PETSC_NULL,"   Allocating %d nonzeros: ",n_alloc);CHKERRQ(ierr);
113    if (n_alloc_max == n_alloc_est) { ierr = PetscInfo(PETSC_NULL,"this is the correct size\n");CHKERRQ(ierr); }
114    else if (n_alloc_now >= n_alloc_est) { ierr = PetscInfo(PETSC_NULL,"the current size, which seems enough\n");CHKERRQ(ierr); }
115    else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) { ierr = PetscInfo(PETSC_NULL,"the maximum estimate\n");CHKERRQ(ierr); }
116    else { ierr = PetscInfo1(PETSC_NULL,"%6.2f %% more than the estimate\n",xtra_perc);CHKERRQ(ierr); }
117 
118 
119    /**********************************************************
120     2. Rescue arrays which would be lost
121     Count how many rows and nonzeros will have to be rescued
122     when moving all arrays in place */
123    n_row_rescue = 0; n_rescue = 0;
124    if (*n_row_alloc_ok==0) { *n_alloc_used = 0; }
125    else  {
126       i = *n_row_alloc_ok - 1;
127       *n_alloc_used = (result->icols[i]-result->alloc_icol) +  result->row_nnz[i];
128    }
129 
130    for (i=*n_row_alloc_ok; i<nrows; i++) {
131       i_here = result->icols[i]-result->alloc_icol;
132       i_last = i_here + result->row_nnz[i];
133       if (result->row_nnz[i]>0) {
134          if (*n_alloc_used > i_here || i_last > n_alloc) {
135             n_rescue += result->row_nnz[i];
136             n_row_rescue++;
137          }
138 
139          if (i<i_row) {  *n_alloc_used += result->row_nnz[i];}
140          else         {  *n_alloc_used += max_row_nnz[i];}
141       }
142    }
143 
144    /* Allocate rescue arrays */
145    ierr = PetscMalloc(n_rescue * sizeof(int), &icol_rescue);CHKERRQ(ierr);
146    ierr = PetscMalloc(n_rescue * sizeof(PetscScalar), &val_rescue);CHKERRQ(ierr);
147 
148    /* Rescue the arrays which need rescuing */
149    n_row_rescue = 0; n_rescue = 0;
150    if (*n_row_alloc_ok==0) { *n_alloc_used = 0; }
151    else {
152       i = *n_row_alloc_ok - 1;
153       *n_alloc_used = (result->icols[i]-result->alloc_icol) +  result->row_nnz[i];
154    }
155 
156    for (i=*n_row_alloc_ok; i<nrows; i++) {
157       i_here = result->icols[i]-result->alloc_icol;
158       i_last = i_here + result->row_nnz[i];
159       if (result->row_nnz[i]>0) {
160          if (*n_alloc_used > i_here || i_last > n_alloc) {
161             ierr = PetscMemcpy((void*) &icol_rescue[n_rescue], (void*) result->icols[i], result->row_nnz[i] * sizeof(int));CHKERRQ(ierr);
162             ierr = PetscMemcpy((void*) &val_rescue[n_rescue], (void*) result->values[i], result->row_nnz[i] * sizeof(PetscScalar));CHKERRQ(ierr);
163             n_rescue += result->row_nnz[i];
164             n_row_rescue++;
165          }
166 
167          if (i<i_row) {  *n_alloc_used += result->row_nnz[i];}
168          else         {  *n_alloc_used += max_row_nnz[i];}
169       }
170    }
171 
172    /**********************************************************
173     3. Reallocate and correct administration */
174 
175    if (n_alloc != result->n_alloc_icol)  {
176       n_copy = PetscMin(n_alloc,result->n_alloc_icol);
177 
178       /* PETSC knows no REALLOC, so we'll REALLOC ourselves.
179 
180          Allocate new icol-data, copy old contents */
181       ierr = PetscMalloc(n_alloc * sizeof(int), &result->alloc_icol);CHKERRQ(ierr);
182       ierr = PetscMemcpy(result->alloc_icol, alloc_icol_old, n_copy*sizeof(int));CHKERRQ(ierr);
183 
184       /* Update administration, Reset pointers to new arrays  */
185       result->n_alloc_icol = n_alloc;
186       for (i=0; i<nrows; i++) {
187           result->icols[i] =  result->alloc_icol + (result->icols[i]  - alloc_icol_old);
188           result->values[i] =  result->alloc_val + (result->icols[i]  - result->alloc_icol);
189       }
190 
191       /* Delete old array */
192       ierr = PetscFree(alloc_icol_old);CHKERRQ(ierr);
193 
194       /* Allocate new value-data, copy old contents */
195       ierr = PetscMalloc(n_alloc * sizeof(PetscScalar), &result->alloc_val);CHKERRQ(ierr);
196       ierr = PetscMemcpy(result->alloc_val, alloc_val_old, n_copy*sizeof(PetscScalar));CHKERRQ(ierr);
197 
198       /* Update administration, Reset pointers to new arrays  */
199       result->n_alloc_val  = n_alloc;
200       for (i=0; i<nrows; i++) {
201           result->values[i] =  result->alloc_val + (result->icols[i]  - result->alloc_icol);
202       }
203 
204       /* Delete old array */
205       ierr = PetscFree(alloc_val_old);CHKERRQ(ierr);
206    }
207 
208 
209    /*********************************************************
210     4. Copy all the arrays to their proper places */
211    n_row_rescue = 0; n_rescue = 0;
212    if (*n_row_alloc_ok==0) { *n_alloc_used = 0; }
213    else {
214       i = *n_row_alloc_ok - 1;
215       *n_alloc_used = (result->icols[i]-result->alloc_icol) +  result->row_nnz[i];
216    }
217 
218    for (i=*n_row_alloc_ok; i<nrows; i++) {
219       i_here = result->icols[i]-result->alloc_icol;
220       i_last = i_here + result->row_nnz[i];
221 
222       result->icols[i] = result->alloc_icol + *n_alloc_used;
223       result->values[i]= result->alloc_val  + *n_alloc_used;
224 
225       if (result->row_nnz[i]>0) {
226          if (*n_alloc_used > i_here || i_last > n_alloc)  {
227             ierr = PetscMemcpy((void*) result->icols[i],  (void*) &icol_rescue[n_rescue], result->row_nnz[i] * sizeof(int));CHKERRQ(ierr);
228             ierr = PetscMemcpy((void*) result->values[i], (void*) &val_rescue[n_rescue],result->row_nnz[i] * sizeof(PetscScalar));CHKERRQ(ierr);
229             n_rescue += result->row_nnz[i];
230             n_row_rescue++;
231          } else {
232             for (j=0; j<result->row_nnz[i]; j++) {
233                 result->icols[i][j]   = result->alloc_icol[i_here+j];
234                 result->values[i][j]  = result->alloc_val[ i_here+j];
235             }
236          }
237          if (i<i_row) {  *n_alloc_used += result->row_nnz[i];}
238          else         {  *n_alloc_used += max_row_nnz[i];}
239       }
240    }
241 
242    /* Delete the rescue arrays */
243    ierr = PetscFree(icol_rescue);CHKERRQ(ierr);
244    ierr = PetscFree(val_rescue);CHKERRQ(ierr);
245    *n_row_alloc_ok = i_row;
246    PetscFunctionReturn(0);
247 }
248 
249 /*
250   spbas_incomplete_cholesky:
251      incomplete Cholesky decomposition of a square, symmetric,
252      positive definite matrix.
253 
254      In case negative diagonals are encountered, function returns
255      NEGATIVE_DIAGONAL. When this happens, call this function again
256      with a larger epsdiag_in, a less sparse pattern, and/or a smaller
257      droptol
258 */
259 #undef __FUNCT__
260 #define __FUNCT__ "spbas_incomplete_cholesky"
261 PetscErrorCode spbas_incomplete_cholesky(Mat A, const PetscInt *rip, const PetscInt *riip, spbas_matrix pattern, PetscReal droptol, PetscReal epsdiag_in, spbas_matrix * matrix_L)
262 {
263    PetscInt         jL;
264    Mat_SeqAIJ       *a=(Mat_SeqAIJ*)A->data;
265    PetscInt         *ai=a->i,*aj=a->j;
266    MatScalar        *aa=a->a;
267    PetscInt         nrows, ncols;
268    PetscInt         *max_row_nnz;
269    PetscErrorCode   ierr;
270    spbas_matrix     retval;
271    PetscScalar      * diag;
272    PetscScalar      * val;
273    PetscScalar      * lvec;
274    PetscScalar      epsdiag;
275    PetscInt         i,j,k;
276    const PetscBool  do_values = PETSC_TRUE;
277    PetscInt         * r1_icol;
278    PetscScalar      *r1_val;
279    PetscInt         * r_icol;
280    PetscInt         r_nnz;
281    PetscScalar      *r_val;
282    PetscInt         * A_icol;
283    PetscInt         A_nnz;
284    PetscScalar      *A_val;
285    PetscInt         * p_icol;
286    PetscInt         p_nnz;
287    PetscInt         n_row_alloc_ok = 0;  /* number of rows which have been stored   correctly in the matrix */
288    PetscInt         n_alloc_used = 0;    /* part of result->icols and result->values   which is currently being used */
289 
290    PetscFunctionBegin;
291    /* Convert the Manteuffel shift from 'fraction of average diagonal' to   dimensioned value */
292    ierr = MatGetSize(A, &nrows, &ncols);CHKERRQ(ierr);
293    ierr = MatGetTrace(A, &epsdiag);CHKERRQ(ierr);
294    epsdiag *= epsdiag_in / nrows;
295    ierr = PetscInfo2(PETSC_NULL,"   Dimensioned Manteuffel shift %G Drop tolerance %G\n", PetscRealPart(epsdiag),droptol);CHKERRQ(ierr);
296 
297    if (droptol<1e-10) {droptol=1e-10;}
298 
299    if ((nrows != pattern.nrows) || (ncols != pattern.ncols) || (ncols != nrows)) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP,"Dimension error in spbas_incomplete_cholesky\n");
300 
301    retval.nrows = nrows;
302    retval.ncols = nrows;
303    retval.nnz   = pattern.nnz/10;
304    retval.col_idx_type = SPBAS_COLUMN_NUMBERS;
305    retval.block_data = PETSC_TRUE;
306 
307    ierr =  spbas_allocate_pattern(&retval, do_values);CHKERRQ(ierr);
308    ierr = PetscMemzero((void*) retval.row_nnz, nrows*sizeof(int));CHKERRQ(ierr);
309    ierr =  spbas_allocate_data(&retval);CHKERRQ(ierr);
310    retval.nnz   = 0;
311 
312    ierr = PetscMalloc(nrows*sizeof(PetscScalar), &diag);CHKERRQ(ierr);
313    ierr = PetscMalloc(nrows*sizeof(PetscScalar), &val);CHKERRQ(ierr);
314    ierr = PetscMalloc(nrows*sizeof(PetscScalar), &lvec);CHKERRQ(ierr);
315    ierr = PetscMalloc(nrows*sizeof(int), &max_row_nnz);CHKERRQ(ierr);
316    if (!(diag && val && lvec && max_row_nnz))  SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MEM, "Allocation error in spbas_incomplete_cholesky\n");
317 
318    ierr = PetscMemzero((void*) val, nrows*sizeof(PetscScalar));CHKERRQ(ierr);
319    ierr = PetscMemzero((void*) lvec, nrows*sizeof(PetscScalar));CHKERRQ(ierr);
320    ierr = PetscMemzero((void*) max_row_nnz, nrows*sizeof(int));CHKERRQ(ierr);
321 
322    /* Check correct format of sparseness pattern */
323    if (pattern.col_idx_type != SPBAS_DIAGONAL_OFFSETS)  SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Error in spbas_incomplete_cholesky: must have diagonal offsets in pattern\n");
324 
325    /* Count the nonzeros on transpose of pattern */
326    for (i = 0; i<nrows; i++)  {
327       p_nnz  = pattern.row_nnz[i];
328       p_icol = pattern.icols[i];
329       for (j=0; j<p_nnz; j++)  {
330          max_row_nnz[i+p_icol[j]]++;
331       }
332    }
333 
334    /* Calculate rows of L */
335    for (i = 0; i<nrows; i++)  {
336       p_nnz  = pattern.row_nnz[i];
337       p_icol = pattern.icols[i];
338 
339       r_nnz  = retval.row_nnz[i];
340       r_icol = retval.icols[i];
341       r_val  = retval.values[i];
342       A_nnz  = ai[rip[i]+1] - ai[rip[i]];
343       A_icol = &aj[ai[rip[i]]];
344       A_val  = &aa[ai[rip[i]]];
345 
346       /* Calculate  val += A(i,i:n)'; */
347       for (j=0; j<A_nnz; j++) {
348          k = riip[A_icol[j]];
349          if (k>=i) { val[k] = A_val[j]; }
350       }
351 
352       /*  Add regularization */
353       val[i] += epsdiag;
354 
355       /* Calculate lvec   = diag(D(0:i-1)) * L(0:i-1,i);
356          val(i) = A(i,i) - L(0:i-1,i)' * lvec */
357       for (j=0; j<r_nnz; j++)  {
358          k           = r_icol[j];
359          lvec[k]     = diag[k] * r_val[j];
360          val[i]     -= r_val[j] * lvec[k];
361       }
362 
363       /* Calculate the new diagonal */
364       diag[i] = val[i];
365       if (PetscRealPart(diag[i])<droptol)  {
366          ierr = PetscInfo(PETSC_NULL,"Error in spbas_incomplete_cholesky:\n");
367          ierr = PetscInfo1(PETSC_NULL,"Negative diagonal in row %d\n",i+1);
368 
369          /* Delete the whole matrix at once. */
370          ierr = spbas_delete(retval);
371          return NEGATIVE_DIAGONAL;
372       }
373 
374       /* If necessary, allocate arrays */
375       if (r_nnz==0) {
376          ierr = spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used);CHKERRQ(ierr);
377          if (ierr == PETSC_ERR_MEM) {
378             ierr = spbas_cholesky_garbage_collect(&retval,  i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr);
379             ierr = spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used);
380          }
381          r_icol = retval.icols[i];
382          r_val  = retval.values[i];
383       }
384 
385       /* Now, fill in */
386       r_icol[r_nnz] = i;
387       r_val [r_nnz] = 1.0;
388       r_nnz++;
389       retval.row_nnz[i]++;
390 
391       retval.nnz += r_nnz;
392 
393       /* Calculate
394          val(i+1:n) = (A(i,i+1:n)- L(0:i-1,i+1:n)' * lvec)/diag(i) */
395       for (j=1; j<p_nnz; j++)  {
396          k = i+p_icol[j];
397          r1_icol = retval.icols[k];
398          r1_val  = retval.values[k];
399          for (jL=0; jL<retval.row_nnz[k]; jL++)   {
400             val[k] -= r1_val[jL] * lvec[r1_icol[jL]];
401          }
402          val[k] /= diag[i];
403 
404          if (PetscAbsScalar(val[k]) > droptol || PetscAbsScalar(val[k])< -droptol) {
405            /* If necessary, allocate arrays */
406             if (retval.row_nnz[k]==0) {
407                ierr = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);
408                if (ierr == PETSC_ERR_MEM) {
409                   ierr = spbas_cholesky_garbage_collect(&retval,  i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr);
410                   ierr = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);CHKERRQ(ierr);
411                   r_icol = retval.icols[i];
412                   r_val  = retval.values[i];
413                }
414             }
415 
416             retval.icols[k][retval.row_nnz[k]] = i;
417             retval.values[k][retval.row_nnz[k]] = val[k];
418             retval.row_nnz[k]++;
419          }
420          val[k] = 0;
421       }
422 
423       /* Erase the values used in the work arrays */
424       for (j=0; j<r_nnz; j++) { lvec[r_icol[j]] = 0; }
425    }
426 
427    ierr=PetscFree(lvec);CHKERRQ(ierr);
428    ierr=PetscFree(val);CHKERRQ(ierr);
429 
430    ierr = spbas_cholesky_garbage_collect(&retval, nrows, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr);
431    ierr=PetscFree(max_row_nnz);CHKERRQ(ierr);
432 
433    /* Place the inverse of the diagonals in the matrix */
434    for (i=0; i<nrows; i++) {
435       r_nnz = retval.row_nnz[i];
436       retval.values[i][r_nnz-1] = 1.0 / diag[i];
437       for (j=0; j<r_nnz-1; j++) {
438          retval.values[i][j] *= -1;
439       }
440    }
441    ierr=PetscFree(diag);CHKERRQ(ierr);
442    *matrix_L = retval;
443    PetscFunctionReturn(0);
444 }
445 
446