xref: /petsc/src/mat/impls/aij/seq/bas/spbas_cholesky.h (revision 9ccc4a9b96fc3663f3953ac586c9c547e88efdff)
1 
2 /*
3    spbas_cholesky_row_alloc:
4       in the data arrays, find a place where another row may be stored.
5       Return PETSC_ERR_MEM if there is insufficient space to store the
6       row, so garbage collection and/or re-allocation may be done.
7 */
8 #undef __FUNCT__
9 #define __FUNCT__ "spbas_cholesky_row_alloc"
10 PetscErrorCode spbas_cholesky_row_alloc(spbas_matrix retval, PetscInt k, PetscInt r_nnz,PetscInt * n_alloc_used)
11 {
12    PetscFunctionBegin;
13    retval.icols[k]  = &retval.alloc_icol[*n_alloc_used];
14    retval.values[k] = &retval.alloc_val[*n_alloc_used];
15    *n_alloc_used   += r_nnz;
16    if (*n_alloc_used > retval.n_alloc_icol) PetscFunctionReturn(PETSC_ERR_MEM);
17    PetscFunctionReturn(0);
18 }
19 
20 
21 /*
22   spbas_cholesky_garbage_collect:
23      move the rows which have been calculated so far, as well as
24      those currently under construction, to the front of the
25      array, while putting them in the proper order.
26      When it seems necessary, enlarge the current arrays.
27 
28      NB: re-allocation is being simulated using
29          PetscMalloc, memcpy, PetscFree, because
30          PetscRealloc does not seem to exist.
31 
32 */
33 #undef __FUNCT__
34 #define __FUNCT__ "spbas_cholesky_garbage_collect"
35 PetscErrorCode spbas_cholesky_garbage_collect(
36       spbas_matrix *result, /* I/O: the Cholesky factor matrix being constructed.
37                                    Only the storage, not the contents of this matrix
38                                    is changed in this function */
39       PetscInt     i_row,             /* I  : Number of rows for which the final contents are
40 				     known */
41       PetscInt     *n_row_alloc_ok,   /* I/O: Number of rows which are already in their final
42 				     places in the arrays: they need not be moved any more */
43       PetscInt     *n_alloc_used,     /* I/O:  */
44       PetscInt     *max_row_nnz       /*I  : Over-estimate of the number of nonzeros needed to
45 				    store each row */
46      )
47 {
48 
49 
50   /* PSEUDO-CODE:
51     1. Choose the appropriate size for the arrays
52     2. Rescue the arrays which would be lost during garbage collection
53     3. Reallocate and correct administration
54     4. Move all arrays so that they are in proper order */
55 
56    PetscInt        i,j;
57    PetscInt        nrows = result->nrows;
58    PetscInt        n_alloc_ok=0;
59    PetscInt        n_alloc_ok_max=0;
60    PetscErrorCode  ierr;
61    PetscInt        need_already= 0;
62    PetscInt        n_rows_ahead=0;
63    PetscInt        max_need_extra= 0;
64    PetscInt        n_alloc_max, n_alloc_est, n_alloc;
65    PetscInt        n_alloc_now = result->n_alloc_icol;
66    PetscInt        *alloc_icol_old = result->alloc_icol;
67    PetscScalar     *alloc_val_old  = result->alloc_val;
68    PetscInt        *icol_rescue;
69    PetscScalar     *val_rescue;
70    PetscInt        n_rescue;
71    PetscInt        n_row_rescue;
72    PetscInt        i_here, i_last, n_copy;
73    const PetscReal xtra_perc = 20;
74 
75    PetscFunctionBegin;
76 
77    /*********************************************************
78     1. Choose appropriate array size
79     Count number of rows and memory usage which is already final */
80    for (i=0;i<i_row; i++)  {
81       n_alloc_ok += result->row_nnz[i];
82       n_alloc_ok_max += max_row_nnz[i];
83    }
84 
85    /* Count currently needed memory usage and future memory requirements
86       (max, predicted)*/
87    for (i=i_row; i<nrows; i++) {
88      if (!result->row_nnz[i]) {
89          max_need_extra  += max_row_nnz[i];
90       } else {
91          need_already    += max_row_nnz[i];
92          n_rows_ahead++;
93       }
94    }
95 
96    /* Make maximal and realistic memory requirement estimates */
97    n_alloc_max = n_alloc_ok + need_already + max_need_extra;
98    n_alloc_est = n_alloc_ok + need_already + (int) (((PetscReal) max_need_extra) *  ((PetscReal) n_alloc_ok) /((PetscReal) n_alloc_ok_max));
99 
100    /* Choose array sizes */
101    if (n_alloc_max == n_alloc_est)  { n_alloc = n_alloc_max; }
102    else if (n_alloc_now >= n_alloc_est)  { n_alloc = n_alloc_now; }
103    else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0))  { n_alloc  = n_alloc_max;  }
104    else { n_alloc = (int) (n_alloc_est * (1+xtra_perc/100.0)); }
105 
106    /* If new estimate is less than what we already have,
107       don't reallocate, just garbage-collect */
108    if (n_alloc_max != n_alloc_est && n_alloc < result->n_alloc_icol) {
109       n_alloc = result->n_alloc_icol;
110    }
111 
112    /* Motivate dimension choice */
113    ierr = PetscInfo1(PETSC_NULL,"   Allocating %d nonzeros: ",n_alloc);CHKERRQ(ierr);
114    if (n_alloc_max == n_alloc_est) { ierr = PetscInfo(PETSC_NULL,"this is the correct size\n");CHKERRQ(ierr); }
115    else if (n_alloc_now >= n_alloc_est) { ierr = PetscInfo(PETSC_NULL,"the current size, which seems enough\n");CHKERRQ(ierr); }
116    else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) { ierr = PetscInfo(PETSC_NULL,"the maximum estimate\n");CHKERRQ(ierr); }
117    else { ierr = PetscInfo1(PETSC_NULL,"%6.2f %% more than the estimate\n",xtra_perc);CHKERRQ(ierr); }
118 
119 
120    /**********************************************************
121     2. Rescue arrays which would be lost
122     Count how many rows and nonzeros will have to be rescued
123     when moving all arrays in place */
124    n_row_rescue = 0; n_rescue = 0;
125    if (*n_row_alloc_ok==0) { *n_alloc_used = 0; }
126    else  {
127       i = *n_row_alloc_ok - 1;
128       *n_alloc_used = (result->icols[i]-result->alloc_icol) +  result->row_nnz[i];
129    }
130 
131    for (i=*n_row_alloc_ok; i<nrows; i++) {
132       i_here = result->icols[i]-result->alloc_icol;
133       i_last = i_here + result->row_nnz[i];
134       if (result->row_nnz[i]>0) {
135          if (*n_alloc_used > i_here || i_last > n_alloc) {
136             n_rescue += result->row_nnz[i];
137             n_row_rescue++;
138          }
139 
140          if (i<i_row) {  *n_alloc_used += result->row_nnz[i];}
141          else         {  *n_alloc_used += max_row_nnz[i];}
142       }
143    }
144 
145    /* Allocate rescue arrays */
146    ierr = PetscMalloc( n_rescue * sizeof(int), &icol_rescue);CHKERRQ(ierr);
147    ierr = PetscMalloc( n_rescue * sizeof(PetscScalar), &val_rescue);CHKERRQ(ierr);
148 
149    /* Rescue the arrays which need rescuing */
150    n_row_rescue = 0; n_rescue = 0;
151    if (*n_row_alloc_ok==0) { *n_alloc_used = 0; }
152    else {
153       i = *n_row_alloc_ok - 1;
154       *n_alloc_used = (result->icols[i]-result->alloc_icol) +  result->row_nnz[i];
155    }
156 
157    for (i=*n_row_alloc_ok; i<nrows; i++) {
158       i_here = result->icols[i]-result->alloc_icol;
159       i_last = i_here + result->row_nnz[i];
160       if (result->row_nnz[i]>0) {
161          if (*n_alloc_used > i_here || i_last > n_alloc)
162          {
163             ierr = PetscMemcpy((void*) &icol_rescue[n_rescue], (void*) result->icols[i], result->row_nnz[i] * sizeof(int));CHKERRQ(ierr);
164             ierr = PetscMemcpy((void*) &val_rescue[n_rescue], (void*) result->values[i], result->row_nnz[i] * sizeof(PetscScalar));CHKERRQ(ierr);
165             n_rescue += result->row_nnz[i];
166             n_row_rescue++;
167          }
168 
169          if (i<i_row) {  *n_alloc_used += result->row_nnz[i];}
170          else         {  *n_alloc_used += max_row_nnz[i];}
171       }
172    }
173 
174    /**********************************************************
175     3. Reallocate and correct administration */
176 
177    if (n_alloc != result->n_alloc_icol)  {
178       n_copy = MIN(n_alloc,result->n_alloc_icol);
179 
180       /* PETSC knows no REALLOC, so we'll REALLOC ourselves.
181 
182 	 Allocate new icol-data, copy old contents */
183       ierr = PetscMalloc(  n_alloc * sizeof(int), &result->alloc_icol);CHKERRQ(ierr);
184       ierr = PetscMemcpy(result->alloc_icol, alloc_icol_old, n_copy*sizeof(int));CHKERRQ(ierr);
185 
186       /* Update administration, Reset pointers to new arrays  */
187       result->n_alloc_icol = n_alloc;
188       for (i=0; i<nrows; i++) {
189           result->icols[i] =  result->alloc_icol + (result->icols[i]  - alloc_icol_old);
190           result->values[i] =  result->alloc_val + (result->icols[i]  - result->alloc_icol);
191       }
192 
193       /* Delete old array */
194       ierr = PetscFree(alloc_icol_old);CHKERRQ(ierr);
195 
196       /* Allocate new value-data, copy old contents */
197       ierr = PetscMalloc(  n_alloc * sizeof(PetscScalar), &result->alloc_val);CHKERRQ(ierr);
198       ierr = PetscMemcpy(result->alloc_val, alloc_val_old, n_copy*sizeof(PetscScalar));CHKERRQ(ierr);
199 
200       /* Update administration, Reset pointers to new arrays  */
201       result->n_alloc_val  = n_alloc;
202       for (i=0; i<nrows; i++) {
203           result->values[i] =  result->alloc_val + (result->icols[i]  - result->alloc_icol);
204       }
205 
206       /* Delete old array */
207       ierr = PetscFree(alloc_val_old);CHKERRQ(ierr);
208    }
209 
210 
211    /*********************************************************
212     4. Copy all the arrays to their proper places */
213    n_row_rescue = 0; n_rescue = 0;
214    if (*n_row_alloc_ok==0) { *n_alloc_used = 0; }
215    else {
216       i = *n_row_alloc_ok - 1;
217       *n_alloc_used = (result->icols[i]-result->alloc_icol) +  result->row_nnz[i];
218    }
219 
220    for (i=*n_row_alloc_ok; i<nrows; i++) {
221       i_here = result->icols[i]-result->alloc_icol;
222       i_last = i_here + result->row_nnz[i];
223 
224       result->icols[i] = result->alloc_icol + *n_alloc_used;
225       result->values[i]= result->alloc_val  + *n_alloc_used;
226 
227       if (result->row_nnz[i]>0) {
228          if (*n_alloc_used > i_here || i_last > n_alloc)  {
229             ierr = PetscMemcpy((void*) result->icols[i],  (void*) &icol_rescue[n_rescue], result->row_nnz[i] * sizeof(int));CHKERRQ(ierr);
230             ierr = PetscMemcpy((void*) result->values[i], (void*) &val_rescue[n_rescue],result->row_nnz[i] * sizeof(PetscScalar));CHKERRQ(ierr);
231             n_rescue += result->row_nnz[i];
232             n_row_rescue++;
233          } else {
234             for (j=0; j<result->row_nnz[i]; j++) {
235                 result->icols[i][j]   = result->alloc_icol[i_here+j];
236                 result->values[i][j]  = result->alloc_val[ i_here+j];
237             }
238          }
239          if (i<i_row) {  *n_alloc_used += result->row_nnz[i];}
240          else         {  *n_alloc_used += max_row_nnz[i];}
241       }
242    }
243 
244    /* Delete the rescue arrays */
245    ierr = PetscFree(icol_rescue);CHKERRQ(ierr);
246    ierr = PetscFree(val_rescue);CHKERRQ(ierr);
247    *n_row_alloc_ok = i_row;
248    PetscFunctionReturn(0);
249 }
250 
251 /*
252   spbas_incomplete_cholesky:
253      incomplete Cholesky decomposition of a square, symmetric,
254      positive definite matrix.
255 
256      In case negative diagonals are encountered, function returns
257      NEGATIVE_DIAGONAL. When this happens, call this function again
258      with a larger epsdiag_in, a less sparse pattern, and/or a smaller
259      droptol
260 */
261 #undef __FUNCT__
262 #define __FUNCT__ "spbas_incomplete_cholesky"
263 PetscErrorCode spbas_incomplete_cholesky(Mat A, const PetscInt *rip, const PetscInt *riip, spbas_matrix pattern, PetscReal droptol, PetscReal epsdiag_in, spbas_matrix * matrix_L)
264 {
265    PetscInt         jL;
266    Mat_SeqAIJ       *a=(Mat_SeqAIJ*)A->data;
267    PetscInt         *ai=a->i,*aj=a->j;
268    MatScalar        *aa=a->a;
269    PetscInt         nrows, ncols;
270    PetscInt         *max_row_nnz;
271    PetscErrorCode   ierr;
272    spbas_matrix     retval;
273    PetscScalar      * diag;
274    PetscScalar      * val;
275    PetscScalar      * lvec;
276    PetscScalar      epsdiag;
277    PetscInt         i,j,k;
278    const PetscTruth do_values = PETSC_TRUE;
279    PetscInt         * r1_icol;
280    PetscScalar      *r1_val;
281    PetscInt         * r_icol;
282    PetscInt         r_nnz;
283    PetscScalar      *r_val;
284    PetscInt         * A_icol;
285    PetscInt         A_nnz;
286    PetscScalar      *A_val;
287    PetscInt         * p_icol;
288    PetscInt         p_nnz;
289    PetscInt         n_row_alloc_ok = 0;  /* number of rows which have been stored   correctly in the matrix */
290    PetscInt         n_alloc_used = 0;    /* part of result->icols and result->values   which is currently being used */
291 
292    PetscFunctionBegin;
293    /* Convert the Manteuffel shift from 'fraction of average diagonal' to
294       dimensioned value */
295    ierr = MatGetSize(A, &nrows, &ncols);CHKERRQ(ierr);
296    ierr = MatGetTrace(A, &epsdiag);CHKERRQ(ierr);
297    epsdiag *= epsdiag_in / nrows;
298    ierr = PetscInfo2(PETSC_NULL,"   Dimensioned Manteuffel shift %G Drop tolerance %G\n", PetscRealPart(epsdiag),droptol);CHKERRQ(ierr);
299 
300    if (droptol<1e-10) {droptol=1e-10;}
301 
302    if ( (nrows != pattern.nrows) ||
303         (ncols != pattern.ncols) ||
304         (ncols != nrows) )
305    {
306       SETERRQ( PETSC_ERR_ARG_INCOMP,
307                "Dimension error in spbas_incomplete_cholesky\n");
308    }
309 
310    retval.nrows = nrows;
311    retval.ncols = nrows;
312    retval.nnz   = pattern.nnz/10;
313    retval.col_idx_type = SPBAS_COLUMN_NUMBERS;
314    retval.block_data = PETSC_TRUE;
315 
316    ierr =  spbas_allocate_pattern(&retval, do_values);CHKERRQ(ierr);
317    memset((void*) retval.row_nnz, 0, nrows*sizeof(int));
318    ierr =  spbas_allocate_data(&retval);CHKERRQ(ierr);
319    retval.nnz   = 0;
320 
321    ierr = PetscMalloc(nrows*sizeof(PetscScalar), &diag);CHKERRQ(ierr);
322    ierr = PetscMalloc(nrows*sizeof(PetscScalar), &val);CHKERRQ(ierr);
323    ierr = PetscMalloc(nrows*sizeof(PetscScalar), &lvec);CHKERRQ(ierr);
324    ierr = PetscMalloc(nrows*sizeof(int), &max_row_nnz);CHKERRQ(ierr);
325    if (!(diag && val && lvec && max_row_nnz))
326    {
327       SETERRQ( PETSC_ERR_MEM,
328               "Allocation error in spbas_incomplete_cholesky\n");
329    }
330 
331    memset((void*) val, 0, nrows*sizeof(PetscScalar));
332    memset((void*) lvec, 0, nrows*sizeof(PetscScalar));
333    memset((void*) max_row_nnz, 0, nrows*sizeof(int));
334 
335    /* Check correct format of sparseness pattern */
336    if (pattern.col_idx_type != SPBAS_DIAGONAL_OFFSETS)
337    {
338       SETERRQ( PETSC_ERR_SUP_SYS,
339                "Error in spbas_incomplete_cholesky: must have diagonal offsets in pattern\n");
340    }
341 
342    /* Count the nonzeros on transpose of pattern */
343    for (i = 0; i<nrows; i++)
344    {
345       p_nnz  = pattern.row_nnz[i];
346       p_icol = pattern.icols[i];
347       for (j=0; j<p_nnz; j++)
348       {
349          max_row_nnz[i+p_icol[j]]++;
350       }
351    }
352 
353    /* Calculate rows of L */
354    for (i = 0; i<nrows; i++)
355    {
356       p_nnz  = pattern.row_nnz[i];
357       p_icol = pattern.icols[i];
358 
359       r_nnz  = retval.row_nnz[i];
360       r_icol = retval.icols[i];
361       r_val  = retval.values[i];
362       A_nnz  = ai[rip[i]+1] - ai[rip[i]];
363       A_icol = &aj[ai[rip[i]]];
364       A_val  = &aa[ai[rip[i]]];
365 
366       /* Calculate  val += A(i,i:n)'; */
367       for (j=0; j<A_nnz; j++)
368       {
369          k = riip[A_icol[j]];
370          if (k>=i) { val[k] = A_val[j]; }
371       }
372 
373       /*  Add regularization */
374       val[i] += epsdiag;
375 
376       /* Calculate lvec   = diag(D(0:i-1)) * L(0:i-1,i);
377 	 val(i) = A(i,i) - L(0:i-1,i)' * lvec */
378       for ( j=0; j<r_nnz; j++)
379       {
380          k           = r_icol[j];
381          lvec[k]     = diag[k] * r_val[j];
382          val[i]     -= r_val[j] * lvec[k];
383       }
384 
385       /* Calculate the new diagonal */
386       diag[i] = val[i];
387       if (PetscRealPart(diag[i])<droptol)
388       {
389          ierr = PetscInfo(PETSC_NULL,"Error in spbas_incomplete_cholesky:\n");
390          ierr = PetscInfo1(PETSC_NULL,"Negative diagonal in row %d\n",i+1);
391 
392          /* Delete the whole matrix at once. */
393          ierr = spbas_delete(retval);
394          return NEGATIVE_DIAGONAL;
395       }
396 
397       /* If necessary, allocate arrays */
398       if (r_nnz==0)
399       {
400          ierr = spbas_cholesky_row_alloc( retval, i, 1, &n_alloc_used);CHKERRQ(ierr);
401          if (ierr == PETSC_ERR_MEM)
402          {
403             ierr = spbas_cholesky_garbage_collect( &retval,  i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr);
404             ierr = spbas_cholesky_row_alloc( retval, i, 1, &n_alloc_used);
405          }
406          r_icol = retval.icols[i];
407          r_val  = retval.values[i];
408       }
409 
410       /* Now, fill in */
411       r_icol[r_nnz] = i;
412       r_val [r_nnz] = 1.0;
413       r_nnz++;
414       retval.row_nnz[i]++;
415 
416       retval.nnz += r_nnz;
417 
418       /* Calculate
419          val(i+1:n) = (A(i,i+1:n)- L(0:i-1,i+1:n)' * lvec)/diag(i) */
420       for (j=1; j<p_nnz; j++)
421       {
422          k = i+p_icol[j];
423          r1_icol = retval.icols[k];
424          r1_val  = retval.values[k];
425          for (jL=0; jL<retval.row_nnz[k]; jL++)
426          {
427             val[k] -= r1_val[jL] * lvec[r1_icol[jL]];
428          }
429          val[k] /= diag[i];
430 
431          if (PetscAbsScalar(val[k]) > droptol || PetscAbsScalar(val[k])< -droptol)
432          {
433 	   /* If necessary, allocate arrays */
434             if (retval.row_nnz[k]==0)
435             {
436                ierr = spbas_cholesky_row_alloc( retval, k, max_row_nnz[k], &n_alloc_used);
437                if (ierr == PETSC_ERR_MEM) {
438                   ierr = spbas_cholesky_garbage_collect( &retval,  i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr);
439                   ierr = spbas_cholesky_row_alloc( retval, k, max_row_nnz[k], &n_alloc_used);CHKERRQ(ierr);
440                   r_icol = retval.icols[i];
441                   r_val  = retval.values[i];
442                }
443             }
444 
445             retval.icols[k][retval.row_nnz[k]] = i;
446             retval.values[k][retval.row_nnz[k]] = val[k];
447             retval.row_nnz[k]++;
448          }
449          val[k] = 0;
450       }
451 
452       /*Erase the values used in the work arrays */
453       for (j=0; j<r_nnz; j++) { lvec[r_icol[j]] = 0; }
454    }
455 
456    ierr=PetscFree(lvec);CHKERRQ(ierr);
457    ierr=PetscFree(val);CHKERRQ(ierr);
458 
459    ierr = spbas_cholesky_garbage_collect( &retval, nrows, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr);
460    ierr=PetscFree(max_row_nnz);CHKERRQ(ierr);
461 
462    /* Place the inverse of the diagonals in the matrix */
463    for (i=0; i<nrows; i++) {
464       r_nnz = retval.row_nnz[i];
465       retval.values[i][r_nnz-1] = 1.0 / diag[i];
466       for (j=0; j<r_nnz-1; j++) {
467          retval.values[i][j] *= -1;
468       }
469    }
470    ierr=PetscFree(diag);CHKERRQ(ierr);
471    *matrix_L = retval;
472    PetscFunctionReturn(0);
473 }
474 
475