xref: /petsc/src/mat/impls/aij/seq/bas/spbas_cholesky.h (revision d54338ec4fdf48b56b5efdd9bf04ca9d50d6ab04)
1 
2 /*
3    spbas_cholesky_row_alloc:
4       in the data arrays, find a place where another row may be stored.
5       Return PETSC_ERR_MEM if there is insufficient space to store the
6       row, so garbage collection and/or re-allocation may be done.
7 */
8 #undef __FUNCT__
9 #define __FUNCT__ "spbas_cholesky_row_alloc"
10 PetscErrorCode spbas_cholesky_row_alloc(spbas_matrix retval, PetscInt k, PetscInt r_nnz,PetscInt * n_alloc_used)
11 {
12    PetscFunctionBegin;
13    retval.icols[k]  = &retval.alloc_icol[*n_alloc_used];
14    retval.values[k] = &retval.alloc_val[*n_alloc_used];
15    *n_alloc_used   += r_nnz;
16    if (*n_alloc_used > retval.n_alloc_icol) PetscFunctionReturn(PETSC_ERR_MEM);
17    PetscFunctionReturn(0);
18 }
19 
20 
21 /*
22   spbas_cholesky_garbage_collect:
23      move the rows which have been calculated so far, as well as
24      those currently under construction, to the front of the
25      array, while putting them in the proper order.
26      When it seems necessary, enlarge the current arrays.
27 
28      NB: re-allocation is being simulated using
29          PetscMalloc, memcpy, PetscFree, because
30          PetscRealloc does not seem to exist.
31 
32 */
33 #undef __FUNCT__
34 #define __FUNCT__ "spbas_cholesky_garbage_collect"
35 PetscErrorCode spbas_cholesky_garbage_collect(
36       spbas_matrix *result, /* I/O: the Cholesky factor matrix being constructed.
37                                    Only the storage, not the contents of this matrix
38                                    is changed in this function */
39       PetscInt     i_row,             /* I  : Number of rows for which the final contents are
40                                      known */
41       PetscInt     *n_row_alloc_ok,   /* I/O: Number of rows which are already in their final
42                                      places in the arrays: they need not be moved any more */
43       PetscInt     *n_alloc_used,     /* I/O:  */
44       PetscInt     *max_row_nnz)      /* I  : Over-estimate of the number of nonzeros needed to
45                                     store each row */
46 {
47 
48 
49   /* PSEUDO-CODE:
50     1. Choose the appropriate size for the arrays
51     2. Rescue the arrays which would be lost during garbage collection
52     3. Reallocate and correct administration
53     4. Move all arrays so that they are in proper order */
54 
55    PetscInt        i,j;
56    PetscInt        nrows = result->nrows;
57    PetscInt        n_alloc_ok=0;
58    PetscInt        n_alloc_ok_max=0;
59    PetscErrorCode  ierr;
60    PetscInt        need_already= 0;
61    PetscInt        n_rows_ahead=0;
62    PetscInt        max_need_extra= 0;
63    PetscInt        n_alloc_max, n_alloc_est, n_alloc;
64    PetscInt        n_alloc_now = result->n_alloc_icol;
65    PetscInt        *alloc_icol_old = result->alloc_icol;
66    PetscScalar     *alloc_val_old  = result->alloc_val;
67    PetscInt        *icol_rescue;
68    PetscScalar     *val_rescue;
69    PetscInt        n_rescue;
70    PetscInt        n_row_rescue;
71    PetscInt        i_here, i_last, n_copy;
72    const PetscReal xtra_perc = 20;
73 
74    PetscFunctionBegin;
75    /*********************************************************
76     1. Choose appropriate array size
77     Count number of rows and memory usage which is already final */
78    for (i=0;i<i_row; i++)  {
79       n_alloc_ok += result->row_nnz[i];
80       n_alloc_ok_max += max_row_nnz[i];
81    }
82 
83    /* Count currently needed memory usage and future memory requirements
84       (max, predicted)*/
85    for (i=i_row; i<nrows; i++) {
86      if (!result->row_nnz[i]) {
87          max_need_extra  += max_row_nnz[i];
88       } else {
89          need_already    += max_row_nnz[i];
90          n_rows_ahead++;
91       }
92    }
93 
94    /* Make maximal and realistic memory requirement estimates */
95    n_alloc_max = n_alloc_ok + need_already + max_need_extra;
96    n_alloc_est = n_alloc_ok + need_already + (int) (((PetscReal) max_need_extra) *  ((PetscReal) n_alloc_ok) /((PetscReal) n_alloc_ok_max));
97 
98    /* Choose array sizes */
99    if (n_alloc_max == n_alloc_est)  { n_alloc = n_alloc_max; }
100    else if (n_alloc_now >= n_alloc_est)  { n_alloc = n_alloc_now; }
101    else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0))  { n_alloc  = n_alloc_max;  }
102    else { n_alloc = (int) (n_alloc_est * (1+xtra_perc/100.0)); }
103 
104    /* If new estimate is less than what we already have,
105       don't reallocate, just garbage-collect */
106    if (n_alloc_max != n_alloc_est && n_alloc < result->n_alloc_icol) {
107       n_alloc = result->n_alloc_icol;
108    }
109 
110    /* Motivate dimension choice */
111    ierr = PetscInfo1(PETSC_NULL,"   Allocating %d nonzeros: ",n_alloc);CHKERRQ(ierr);
112    if (n_alloc_max == n_alloc_est) { ierr = PetscInfo(PETSC_NULL,"this is the correct size\n");CHKERRQ(ierr); }
113    else if (n_alloc_now >= n_alloc_est) { ierr = PetscInfo(PETSC_NULL,"the current size, which seems enough\n");CHKERRQ(ierr); }
114    else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) { ierr = PetscInfo(PETSC_NULL,"the maximum estimate\n");CHKERRQ(ierr); }
115    else { ierr = PetscInfo1(PETSC_NULL,"%6.2f %% more than the estimate\n",xtra_perc);CHKERRQ(ierr); }
116 
117 
118    /**********************************************************
119     2. Rescue arrays which would be lost
120     Count how many rows and nonzeros will have to be rescued
121     when moving all arrays in place */
122    n_row_rescue = 0; n_rescue = 0;
123    if (*n_row_alloc_ok==0) { *n_alloc_used = 0; }
124    else  {
125       i = *n_row_alloc_ok - 1;
126       *n_alloc_used = (result->icols[i]-result->alloc_icol) +  result->row_nnz[i];
127    }
128 
129    for (i=*n_row_alloc_ok; i<nrows; i++) {
130       i_here = result->icols[i]-result->alloc_icol;
131       i_last = i_here + result->row_nnz[i];
132       if (result->row_nnz[i]>0) {
133          if (*n_alloc_used > i_here || i_last > n_alloc) {
134             n_rescue += result->row_nnz[i];
135             n_row_rescue++;
136          }
137 
138          if (i<i_row) {  *n_alloc_used += result->row_nnz[i];}
139          else         {  *n_alloc_used += max_row_nnz[i];}
140       }
141    }
142 
143    /* Allocate rescue arrays */
144    ierr = PetscMalloc(n_rescue * sizeof(int), &icol_rescue);CHKERRQ(ierr);
145    ierr = PetscMalloc(n_rescue * sizeof(PetscScalar), &val_rescue);CHKERRQ(ierr);
146 
147    /* Rescue the arrays which need rescuing */
148    n_row_rescue = 0; n_rescue = 0;
149    if (*n_row_alloc_ok==0) { *n_alloc_used = 0; }
150    else {
151       i = *n_row_alloc_ok - 1;
152       *n_alloc_used = (result->icols[i]-result->alloc_icol) +  result->row_nnz[i];
153    }
154 
155    for (i=*n_row_alloc_ok; i<nrows; i++) {
156       i_here = result->icols[i]-result->alloc_icol;
157       i_last = i_here + result->row_nnz[i];
158       if (result->row_nnz[i]>0) {
159          if (*n_alloc_used > i_here || i_last > n_alloc) {
160             ierr = PetscMemcpy((void*) &icol_rescue[n_rescue], (void*) result->icols[i], result->row_nnz[i] * sizeof(int));CHKERRQ(ierr);
161             ierr = PetscMemcpy((void*) &val_rescue[n_rescue], (void*) result->values[i], result->row_nnz[i] * sizeof(PetscScalar));CHKERRQ(ierr);
162             n_rescue += result->row_nnz[i];
163             n_row_rescue++;
164          }
165 
166          if (i<i_row) {  *n_alloc_used += result->row_nnz[i];}
167          else         {  *n_alloc_used += max_row_nnz[i];}
168       }
169    }
170 
171    /**********************************************************
172     3. Reallocate and correct administration */
173 
174    if (n_alloc != result->n_alloc_icol)  {
175       n_copy = PetscMin(n_alloc,result->n_alloc_icol);
176 
177       /* PETSC knows no REALLOC, so we'll REALLOC ourselves.
178 
179          Allocate new icol-data, copy old contents */
180       ierr = PetscMalloc(n_alloc * sizeof(int), &result->alloc_icol);CHKERRQ(ierr);
181       ierr = PetscMemcpy(result->alloc_icol, alloc_icol_old, n_copy*sizeof(int));CHKERRQ(ierr);
182 
183       /* Update administration, Reset pointers to new arrays  */
184       result->n_alloc_icol = n_alloc;
185       for (i=0; i<nrows; i++) {
186           result->icols[i] =  result->alloc_icol + (result->icols[i]  - alloc_icol_old);
187           result->values[i] =  result->alloc_val + (result->icols[i]  - result->alloc_icol);
188       }
189 
190       /* Delete old array */
191       ierr = PetscFree(alloc_icol_old);CHKERRQ(ierr);
192 
193       /* Allocate new value-data, copy old contents */
194       ierr = PetscMalloc(n_alloc * sizeof(PetscScalar), &result->alloc_val);CHKERRQ(ierr);
195       ierr = PetscMemcpy(result->alloc_val, alloc_val_old, n_copy*sizeof(PetscScalar));CHKERRQ(ierr);
196 
197       /* Update administration, Reset pointers to new arrays  */
198       result->n_alloc_val  = n_alloc;
199       for (i=0; i<nrows; i++) {
200           result->values[i] =  result->alloc_val + (result->icols[i]  - result->alloc_icol);
201       }
202 
203       /* Delete old array */
204       ierr = PetscFree(alloc_val_old);CHKERRQ(ierr);
205    }
206 
207 
208    /*********************************************************
209     4. Copy all the arrays to their proper places */
210    n_row_rescue = 0; n_rescue = 0;
211    if (*n_row_alloc_ok==0) { *n_alloc_used = 0; }
212    else {
213       i = *n_row_alloc_ok - 1;
214       *n_alloc_used = (result->icols[i]-result->alloc_icol) +  result->row_nnz[i];
215    }
216 
217    for (i=*n_row_alloc_ok; i<nrows; i++) {
218       i_here = result->icols[i]-result->alloc_icol;
219       i_last = i_here + result->row_nnz[i];
220 
221       result->icols[i] = result->alloc_icol + *n_alloc_used;
222       result->values[i]= result->alloc_val  + *n_alloc_used;
223 
224       if (result->row_nnz[i]>0) {
225          if (*n_alloc_used > i_here || i_last > n_alloc)  {
226             ierr = PetscMemcpy((void*) result->icols[i],  (void*) &icol_rescue[n_rescue], result->row_nnz[i] * sizeof(int));CHKERRQ(ierr);
227             ierr = PetscMemcpy((void*) result->values[i], (void*) &val_rescue[n_rescue],result->row_nnz[i] * sizeof(PetscScalar));CHKERRQ(ierr);
228             n_rescue += result->row_nnz[i];
229             n_row_rescue++;
230          } else {
231             for (j=0; j<result->row_nnz[i]; j++) {
232                 result->icols[i][j]   = result->alloc_icol[i_here+j];
233                 result->values[i][j]  = result->alloc_val[ i_here+j];
234             }
235          }
236          if (i<i_row) {  *n_alloc_used += result->row_nnz[i];}
237          else         {  *n_alloc_used += max_row_nnz[i];}
238       }
239    }
240 
241    /* Delete the rescue arrays */
242    ierr = PetscFree(icol_rescue);CHKERRQ(ierr);
243    ierr = PetscFree(val_rescue);CHKERRQ(ierr);
244    *n_row_alloc_ok = i_row;
245    PetscFunctionReturn(0);
246 }
247 
248 /*
249   spbas_incomplete_cholesky:
250      incomplete Cholesky decomposition of a square, symmetric,
251      positive definite matrix.
252 
253      In case negative diagonals are encountered, function returns
254      NEGATIVE_DIAGONAL. When this happens, call this function again
255      with a larger epsdiag_in, a less sparse pattern, and/or a smaller
256      droptol
257 */
258 #undef __FUNCT__
259 #define __FUNCT__ "spbas_incomplete_cholesky"
260 PetscErrorCode spbas_incomplete_cholesky(Mat A, const PetscInt *rip, const PetscInt *riip, spbas_matrix pattern, PetscReal droptol, PetscReal epsdiag_in, spbas_matrix * matrix_L)
261 {
262    PetscInt         jL;
263    Mat_SeqAIJ       *a=(Mat_SeqAIJ*)A->data;
264    PetscInt         *ai=a->i,*aj=a->j;
265    MatScalar        *aa=a->a;
266    PetscInt         nrows, ncols;
267    PetscInt         *max_row_nnz;
268    PetscErrorCode   ierr;
269    spbas_matrix     retval;
270    PetscScalar      * diag;
271    PetscScalar      * val;
272    PetscScalar      * lvec;
273    PetscScalar      epsdiag;
274    PetscInt         i,j,k;
275    const PetscBool  do_values = PETSC_TRUE;
276    PetscInt         * r1_icol;
277    PetscScalar      *r1_val;
278    PetscInt         * r_icol;
279    PetscInt         r_nnz;
280    PetscScalar      *r_val;
281    PetscInt         * A_icol;
282    PetscInt         A_nnz;
283    PetscScalar      *A_val;
284    PetscInt         * p_icol;
285    PetscInt         p_nnz;
286    PetscInt         n_row_alloc_ok = 0;  /* number of rows which have been stored   correctly in the matrix */
287    PetscInt         n_alloc_used = 0;    /* part of result->icols and result->values   which is currently being used */
288 
289    PetscFunctionBegin;
290    /* Convert the Manteuffel shift from 'fraction of average diagonal' to   dimensioned value */
291    ierr = MatGetSize(A, &nrows, &ncols);CHKERRQ(ierr);
292    ierr = MatGetTrace(A, &epsdiag);CHKERRQ(ierr);
293    epsdiag *= epsdiag_in / nrows;
294    ierr = PetscInfo2(PETSC_NULL,"   Dimensioned Manteuffel shift %G Drop tolerance %G\n", PetscRealPart(epsdiag),droptol);CHKERRQ(ierr);
295 
296    if (droptol<1e-10) {droptol=1e-10;}
297 
298    if ((nrows != pattern.nrows) || (ncols != pattern.ncols) || (ncols != nrows)) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP,"Dimension error in spbas_incomplete_cholesky\n");
299 
300    retval.nrows = nrows;
301    retval.ncols = nrows;
302    retval.nnz   = pattern.nnz/10;
303    retval.col_idx_type = SPBAS_COLUMN_NUMBERS;
304    retval.block_data = PETSC_TRUE;
305 
306    ierr =  spbas_allocate_pattern(&retval, do_values);CHKERRQ(ierr);
307    ierr = PetscMemzero((void*) retval.row_nnz, nrows*sizeof(int));CHKERRQ(ierr);
308    ierr =  spbas_allocate_data(&retval);CHKERRQ(ierr);
309    retval.nnz   = 0;
310 
311    ierr = PetscMalloc(nrows*sizeof(PetscScalar), &diag);CHKERRQ(ierr);
312    ierr = PetscMalloc(nrows*sizeof(PetscScalar), &val);CHKERRQ(ierr);
313    ierr = PetscMalloc(nrows*sizeof(PetscScalar), &lvec);CHKERRQ(ierr);
314    ierr = PetscMalloc(nrows*sizeof(int), &max_row_nnz);CHKERRQ(ierr);
315    if (!(diag && val && lvec && max_row_nnz))  SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MEM, "Allocation error in spbas_incomplete_cholesky\n");
316 
317    ierr = PetscMemzero((void*) val, nrows*sizeof(PetscScalar));CHKERRQ(ierr);
318    ierr = PetscMemzero((void*) lvec, nrows*sizeof(PetscScalar));CHKERRQ(ierr);
319    ierr = PetscMemzero((void*) max_row_nnz, nrows*sizeof(int));CHKERRQ(ierr);
320 
321    /* Check correct format of sparseness pattern */
322    if (pattern.col_idx_type != SPBAS_DIAGONAL_OFFSETS)  SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Error in spbas_incomplete_cholesky: must have diagonal offsets in pattern\n");
323 
324    /* Count the nonzeros on transpose of pattern */
325    for (i = 0; i<nrows; i++)  {
326       p_nnz  = pattern.row_nnz[i];
327       p_icol = pattern.icols[i];
328       for (j=0; j<p_nnz; j++)  {
329          max_row_nnz[i+p_icol[j]]++;
330       }
331    }
332 
333    /* Calculate rows of L */
334    for (i = 0; i<nrows; i++)  {
335       p_nnz  = pattern.row_nnz[i];
336       p_icol = pattern.icols[i];
337 
338       r_nnz  = retval.row_nnz[i];
339       r_icol = retval.icols[i];
340       r_val  = retval.values[i];
341       A_nnz  = ai[rip[i]+1] - ai[rip[i]];
342       A_icol = &aj[ai[rip[i]]];
343       A_val  = &aa[ai[rip[i]]];
344 
345       /* Calculate  val += A(i,i:n)'; */
346       for (j=0; j<A_nnz; j++) {
347          k = riip[A_icol[j]];
348          if (k>=i) { val[k] = A_val[j]; }
349       }
350 
351       /*  Add regularization */
352       val[i] += epsdiag;
353 
354       /* Calculate lvec   = diag(D(0:i-1)) * L(0:i-1,i);
355          val(i) = A(i,i) - L(0:i-1,i)' * lvec */
356       for (j=0; j<r_nnz; j++)  {
357          k           = r_icol[j];
358          lvec[k]     = diag[k] * r_val[j];
359          val[i]     -= r_val[j] * lvec[k];
360       }
361 
362       /* Calculate the new diagonal */
363       diag[i] = val[i];
364       if (PetscRealPart(diag[i])<droptol)  {
365          ierr = PetscInfo(PETSC_NULL,"Error in spbas_incomplete_cholesky:\n");
366          ierr = PetscInfo1(PETSC_NULL,"Negative diagonal in row %d\n",i+1);
367 
368          /* Delete the whole matrix at once. */
369          ierr = spbas_delete(retval);
370          return NEGATIVE_DIAGONAL;
371       }
372 
373       /* If necessary, allocate arrays */
374       if (r_nnz==0) {
375          ierr = spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used);CHKERRQ(ierr);
376          if (ierr == PETSC_ERR_MEM) {
377             ierr = spbas_cholesky_garbage_collect(&retval,  i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr);
378             ierr = spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used);
379          }
380          r_icol = retval.icols[i];
381          r_val  = retval.values[i];
382       }
383 
384       /* Now, fill in */
385       r_icol[r_nnz] = i;
386       r_val [r_nnz] = 1.0;
387       r_nnz++;
388       retval.row_nnz[i]++;
389 
390       retval.nnz += r_nnz;
391 
392       /* Calculate
393          val(i+1:n) = (A(i,i+1:n)- L(0:i-1,i+1:n)' * lvec)/diag(i) */
394       for (j=1; j<p_nnz; j++)  {
395          k = i+p_icol[j];
396          r1_icol = retval.icols[k];
397          r1_val  = retval.values[k];
398          for (jL=0; jL<retval.row_nnz[k]; jL++)   {
399             val[k] -= r1_val[jL] * lvec[r1_icol[jL]];
400          }
401          val[k] /= diag[i];
402 
403          if (PetscAbsScalar(val[k]) > droptol || PetscAbsScalar(val[k])< -droptol) {
404            /* If necessary, allocate arrays */
405             if (retval.row_nnz[k]==0) {
406                ierr = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);
407                if (ierr == PETSC_ERR_MEM) {
408                   ierr = spbas_cholesky_garbage_collect(&retval,  i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr);
409                   ierr = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);CHKERRQ(ierr);
410                   r_icol = retval.icols[i];
411                   r_val  = retval.values[i];
412                }
413             }
414 
415             retval.icols[k][retval.row_nnz[k]] = i;
416             retval.values[k][retval.row_nnz[k]] = val[k];
417             retval.row_nnz[k]++;
418          }
419          val[k] = 0;
420       }
421 
422       /* Erase the values used in the work arrays */
423       for (j=0; j<r_nnz; j++) { lvec[r_icol[j]] = 0; }
424    }
425 
426    ierr=PetscFree(lvec);CHKERRQ(ierr);
427    ierr=PetscFree(val);CHKERRQ(ierr);
428 
429    ierr = spbas_cholesky_garbage_collect(&retval, nrows, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr);
430    ierr=PetscFree(max_row_nnz);CHKERRQ(ierr);
431 
432    /* Place the inverse of the diagonals in the matrix */
433    for (i=0; i<nrows; i++) {
434       r_nnz = retval.row_nnz[i];
435       retval.values[i][r_nnz-1] = 1.0 / diag[i];
436       for (j=0; j<r_nnz-1; j++) {
437          retval.values[i][j] *= -1;
438       }
439    }
440    ierr=PetscFree(diag);CHKERRQ(ierr);
441    *matrix_L = retval;
442    PetscFunctionReturn(0);
443 }
444 
445