xref: /petsc/src/mat/impls/aij/seq/bas/spbas_cholesky.h (revision 71d55d18a6cc105fecd21255dfee54cec910a2af)
1 
2 /*
3    spbas_cholesky_row_alloc:
4       in the data arrays, find a place where another row may be stored.
5       Return PETSC_ERR_MEM if there is insufficient space to store the
6       row, so garbage collection and/or re-allocation may be done.
7 */
8 #undef __FUNCT__
9 #define __FUNCT__ "spbas_cholesky_row_alloc"
10 PetscErrorCode spbas_cholesky_row_alloc(spbas_matrix retval, PetscInt k, PetscInt r_nnz,PetscInt * n_alloc_used)
11 {
12    PetscFunctionBegin;
13    retval.icols[k]  = &retval.alloc_icol[*n_alloc_used];
14    retval.values[k] = &retval.alloc_val[*n_alloc_used];
15    *n_alloc_used   += r_nnz;
16    if (*n_alloc_used > retval.n_alloc_icol)
17    {
18       PetscFunctionReturn(PETSC_ERR_MEM);
19    }
20    else
21    {
22       PetscFunctionReturn(0);
23    }
24 }
25 
26 
27 /*
28   spbas_cholesky_garbage_collect:
29      move the rows which have been calculated so far, as well as
30      those currently under construction, to the front of the
31      array, while putting them in the proper order.
32      When it seems necessary, enlarge the current arrays.
33 
34      NB: re-allocation is being simulated using
35          PetscMalloc, memcpy, PetscFree, because
36          PetscRealloc does not seem to exist.
37 
38 */
39 #undef __FUNCT__
40 #define __FUNCT__ "spbas_cholesky_garbage_collect"
41 PetscErrorCode spbas_cholesky_garbage_collect(
42       spbas_matrix * result, /* I/O: the Cholesky factor matrix being constructed.
43                                    Only the storage, not the contents of this matrix
44                                    is changed in this function */
45       PetscInt i_row,             /* I  : Number of rows for which the final contents are
46 				     known */
47       PetscInt *n_row_alloc_ok,   /* I/O: Number of rows which are already in their final
48 				     places in the arrays: they need not be moved any more */
49       PetscInt *n_alloc_used,     /* I/O:  */
50       PetscInt *max_row_nnz       /*I  : Over-estimate of the number of nonzeros needed to
51 				    store each row */
52      )
53 {
54 
55 
56   /* PSEUDO-CODE:
57     1. Choose the appropriate size for the arrays
58     2. Rescue the arrays which would be lost during garbage collection
59     3. Reallocate and correct administration
60     4. Move all arrays so that they are in proper order */
61 
62    PetscInt i,j;
63    PetscInt nrows = result->nrows;
64    PetscInt n_alloc_ok=0;
65    PetscInt n_alloc_ok_max=0;
66 
67    PetscErrorCode ierr;
68    PetscInt need_already= 0;
69    PetscInt n_rows_ahead=0;
70    PetscInt max_need_extra= 0;
71    PetscInt n_alloc_max, n_alloc_est, n_alloc;
72    PetscInt n_alloc_now = result->n_alloc_icol;
73    PetscInt *alloc_icol_old = result->alloc_icol;
74    PetscScalar *alloc_val_old  = result->alloc_val;
75    PetscInt *icol_rescue;
76    PetscScalar *val_rescue;
77    PetscInt n_rescue;
78    PetscInt n_row_rescue;
79    PetscInt i_here, i_last, n_copy;
80    const PetscReal xtra_perc = 20;
81 
82    PetscFunctionBegin;
83 
84    /*********************************************************
85     1. Choose appropriate array size
86     Count number of rows and memory usage which is already final */
87    for (i=0;i<i_row; i++)
88    {
89       n_alloc_ok += result->row_nnz[i];
90       n_alloc_ok_max += max_row_nnz[i];
91    }
92 
93    /* Count currently needed memory usage and future memory requirements
94       (max, predicted)*/
95    for (i=i_row; i<nrows; i++)
96    {
97       if (result->row_nnz[i]==0)
98       {
99          max_need_extra  += max_row_nnz[i];
100       }
101       else
102       {
103          need_already    += max_row_nnz[i];
104          n_rows_ahead++;
105       }
106    }
107 
108    /* Make maximal and realistic memory requirement estimates */
109    n_alloc_max = n_alloc_ok + need_already + max_need_extra;
110    n_alloc_est = n_alloc_ok + need_already + (int) (((PetscReal) max_need_extra) *
111 						    ((PetscReal) n_alloc_ok) /((PetscReal) n_alloc_ok_max));
112 
113    /* Choose array sizes */
114    if (n_alloc_max == n_alloc_est)
115       { n_alloc = n_alloc_max; }
116    else if (n_alloc_now >= n_alloc_est)
117       { n_alloc = n_alloc_now; }
118    else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0))
119       { n_alloc  = n_alloc_max;  }
120    else
121       { n_alloc = (int) (n_alloc_est * (1+xtra_perc/100.0)); }
122 
123    /* If new estimate is less than what we already have,
124       don't reallocate, just garbage-collect */
125    if (n_alloc_max != n_alloc_est && n_alloc < result->n_alloc_icol)
126    {
127       n_alloc = result->n_alloc_icol;
128    }
129 
130    /* Motivate dimension choice */
131    ierr = PetscInfo1(PETSC_NULL,"   Allocating %d nonzeros: ",n_alloc);CHKERRQ(ierr);
132    if (n_alloc_max == n_alloc_est)
133       { ierr = PetscInfo(PETSC_NULL,"this is the correct size\n");CHKERRQ(ierr); }
134    else if (n_alloc_now >= n_alloc_est)
135       { ierr = PetscInfo(PETSC_NULL,"the current size, which seems enough\n");CHKERRQ(ierr); }
136    else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0))
137       { ierr = PetscInfo(PETSC_NULL,"the maximum estimate\n");CHKERRQ(ierr); }
138    else
139       { ierr = PetscInfo1(PETSC_NULL,"%6.2f %% more than the estimate\n",xtra_perc);CHKERRQ(ierr); }
140 
141 
142    /**********************************************************
143     2. Rescue arrays which would be lost
144     Count how many rows and nonzeros will have to be rescued
145     when moving all arrays in place */
146    n_row_rescue = 0; n_rescue = 0;
147    if (*n_row_alloc_ok==0) { *n_alloc_used = 0; }
148    else
149    {
150       i = *n_row_alloc_ok - 1;
151       *n_alloc_used = (result->icols[i]-result->alloc_icol) +
152                        result->row_nnz[i];
153    }
154 
155    for (i=*n_row_alloc_ok; i<nrows; i++)
156    {
157       i_here = result->icols[i]-result->alloc_icol;
158       i_last = i_here + result->row_nnz[i];
159       if (result->row_nnz[i]>0)
160       {
161          if (*n_alloc_used > i_here || i_last > n_alloc)
162          {
163             n_rescue += result->row_nnz[i];
164             n_row_rescue++;
165          }
166 
167          if (i<i_row) {  *n_alloc_used += result->row_nnz[i];}
168          else         {  *n_alloc_used += max_row_nnz[i];}
169       }
170    }
171 
172    /* Allocate rescue arrays */
173    ierr = PetscMalloc( n_rescue * sizeof(int), &icol_rescue);
174    CHKERRQ(ierr);
175    ierr = PetscMalloc( n_rescue * sizeof(PetscScalar), &val_rescue);
176    CHKERRQ(ierr);
177 
178    /* Rescue the arrays which need rescuing */
179    n_row_rescue = 0; n_rescue = 0;
180    if (*n_row_alloc_ok==0) { *n_alloc_used = 0; }
181    else
182    {
183       i = *n_row_alloc_ok - 1;
184       *n_alloc_used = (result->icols[i]-result->alloc_icol) +
185                        result->row_nnz[i];
186    }
187 
188    for (i=*n_row_alloc_ok; i<nrows; i++)
189    {
190       i_here = result->icols[i]-result->alloc_icol;
191       i_last = i_here + result->row_nnz[i];
192       if (result->row_nnz[i]>0)
193       {
194          if (*n_alloc_used > i_here || i_last > n_alloc)
195          {
196             memcpy((void*) &icol_rescue[n_rescue],
197                    (void*) result->icols[i],
198                     result->row_nnz[i] * sizeof(int));
199             memcpy((void*) &val_rescue[n_rescue],
200                    (void*) result->values[i],
201                    result->row_nnz[i] * sizeof(PetscScalar));
202             n_rescue += result->row_nnz[i];
203             n_row_rescue++;
204          }
205 
206          if (i<i_row) {  *n_alloc_used += result->row_nnz[i];}
207          else         {  *n_alloc_used += max_row_nnz[i];}
208       }
209    }
210 
211    /**********************************************************
212     3. Reallocate and correct administration */
213 
214    if (n_alloc != result->n_alloc_icol)
215    {
216       n_copy = MIN(n_alloc,result->n_alloc_icol);
217 
218       /* PETSC knows no REALLOC, so we'll REALLOC ourselves.
219 
220 	 Allocate new icol-data, copy old contents */
221       ierr = PetscMalloc(  n_alloc * sizeof(int),
222                            &result->alloc_icol); CHKERRQ(ierr);
223       memcpy(result->alloc_icol, alloc_icol_old, n_copy*sizeof(int));
224 
225       /* Update administration, Reset pointers to new arrays  */
226       result->n_alloc_icol = n_alloc;
227       for (i=0; i<nrows; i++)
228       {
229           result->icols[i] =  result->alloc_icol +
230                   (result->icols[i]  - alloc_icol_old);
231           result->values[i] =  result->alloc_val +
232                   (result->icols[i]  - result->alloc_icol);
233       }
234 
235       /* Delete old array */
236       ierr = PetscFree(alloc_icol_old);  CHKERRQ(ierr);
237 
238       /* Allocate new value-data, copy old contents */
239       ierr = PetscMalloc(  n_alloc * sizeof(PetscScalar),
240                            &result->alloc_val); CHKERRQ(ierr);
241       memcpy(result->alloc_val, alloc_val_old, n_copy*sizeof(PetscScalar));
242 
243       /* Update administration, Reset pointers to new arrays  */
244       result->n_alloc_val  = n_alloc;
245       for (i=0; i<nrows; i++)
246       {
247           result->values[i] =  result->alloc_val +
248                   (result->icols[i]  - result->alloc_icol);
249       }
250 
251       /* Delete old array */
252       ierr = PetscFree(alloc_val_old);  CHKERRQ(ierr);
253    }
254 
255 
256    /*********************************************************
257     4. Copy all the arrays to their proper places */
258    n_row_rescue = 0; n_rescue = 0;
259    if (*n_row_alloc_ok==0) { *n_alloc_used = 0; }
260    else
261    {
262       i = *n_row_alloc_ok - 1;
263       *n_alloc_used = (result->icols[i]-result->alloc_icol) +
264                        result->row_nnz[i];
265    }
266 
267    for (i=*n_row_alloc_ok; i<nrows; i++)
268    {
269       i_here = result->icols[i]-result->alloc_icol;
270       i_last = i_here + result->row_nnz[i];
271 
272       result->icols[i] = result->alloc_icol + *n_alloc_used;
273       result->values[i]= result->alloc_val  + *n_alloc_used;
274 
275       if (result->row_nnz[i]>0)
276       {
277          if (*n_alloc_used > i_here || i_last > n_alloc)
278          {
279             memcpy((void*) result->icols[i],
280                    (void*) &icol_rescue[n_rescue],
281                     result->row_nnz[i] * sizeof(int));
282             memcpy((void*) result->values[i],
283                    (void*) &val_rescue[n_rescue],
284                     result->row_nnz[i] * sizeof(PetscScalar));
285             n_rescue += result->row_nnz[i];
286             n_row_rescue++;
287          }
288          else
289          {
290             for (j=0; j<result->row_nnz[i]; j++)
291             {
292                 result->icols[i][j]   = result->alloc_icol[i_here+j];
293                 result->values[i][j]  = result->alloc_val[ i_here+j];
294             }
295          }
296          if (i<i_row) {  *n_alloc_used += result->row_nnz[i];}
297          else         {  *n_alloc_used += max_row_nnz[i];}
298       }
299    }
300 
301    /* Delete the rescue arrays */
302    ierr = PetscFree(icol_rescue); CHKERRQ(ierr);
303    ierr = PetscFree(val_rescue);  CHKERRQ(ierr);
304    *n_row_alloc_ok = i_row;
305 
306    PetscFunctionReturn(0);
307 
308 }
309 
310 /*
311   spbas_incomplete_cholesky:
312      incomplete Cholesky decomposition of a square, symmetric,
313      positive definite matrix.
314 
315      In case negative diagonals are encountered, function returns
316      NEGATIVE_DIAGONAL. When this happens, call this function again
317      with a larger epsdiag_in, a less sparse pattern, and/or a smaller
318      droptol
319 */
320 #undef __FUNCT__
321 #define __FUNCT__ "spbas_incomplete_cholesky"
322 PetscErrorCode spbas_incomplete_cholesky(
323         Mat A, const PetscInt *rip, const PetscInt *riip,
324         spbas_matrix pattern,
325         PetscReal droptol, PetscReal epsdiag_in,
326         spbas_matrix * matrix_L)
327 {
328    PetscInt jL;
329    Mat_SeqAIJ     *a=(Mat_SeqAIJ*)A->data;
330    PetscInt       *ai=a->i,*aj=a->j;
331    MatScalar      *aa=a->a;
332    PetscInt nrows, ncols;
333    PetscInt *max_row_nnz;
334    PetscErrorCode ierr;
335    spbas_matrix retval;
336    PetscScalar * diag;
337    PetscScalar * val;
338    PetscScalar * lvec;
339    PetscScalar epsdiag;
340    PetscInt i,j,k;
341    const PetscTruth do_values = PETSC_TRUE;
342    PetscInt * r1_icol; PetscScalar *r1_val;
343    PetscInt * r_icol; PetscInt r_nnz; PetscScalar *r_val;
344    PetscInt * A_icol; PetscInt A_nnz; PetscScalar *A_val;
345    PetscInt * p_icol; PetscInt p_nnz;
346    PetscInt n_row_alloc_ok = 0;  /* number of rows which have been stored
347 				    correctly in the matrix */
348    PetscInt n_alloc_used = 0;    /* part of result->icols and result->values
349 				    which is currently being used */
350    PetscFunctionBegin;
351 
352    /* Convert the Manteuffel shift from 'fraction of average diagonal' to
353       dimensioned value */
354    ierr = MatGetSize(A, &nrows, &ncols); CHKERRQ(ierr);
355    ierr = MatGetTrace(A, &epsdiag); CHKERRQ(ierr);
356    epsdiag *= epsdiag_in / nrows;
357    ierr = PetscInfo2(PETSC_NULL,"   Dimensioned Manteuffel shift %G Drop tolerance %G\n", PetscRealPart(epsdiag),droptol);CHKERRQ(ierr);
358 
359    if (droptol<1e-10) {droptol=1e-10;}
360 
361    if ( (nrows != pattern.nrows) ||
362         (ncols != pattern.ncols) ||
363         (ncols != nrows) )
364    {
365       SETERRQ( PETSC_ERR_ARG_INCOMP,
366                "Dimension error in spbas_incomplete_cholesky\n");
367    }
368 
369    retval.nrows = nrows;
370    retval.ncols = nrows;
371    retval.nnz   = pattern.nnz/10;
372    retval.col_idx_type = SPBAS_COLUMN_NUMBERS;
373    retval.block_data = PETSC_TRUE;
374 
375    ierr =  spbas_allocate_pattern(&retval, do_values); CHKERRQ(ierr);
376    memset((void*) retval.row_nnz, 0, nrows*sizeof(int));
377    ierr =  spbas_allocate_data(&retval); CHKERRQ(ierr);
378    retval.nnz   = 0;
379 
380    ierr = PetscMalloc(nrows*sizeof(PetscScalar), &diag); CHKERRQ(ierr);
381    ierr = PetscMalloc(nrows*sizeof(PetscScalar), &val);  CHKERRQ(ierr);
382    ierr = PetscMalloc(nrows*sizeof(PetscScalar), &lvec); CHKERRQ(ierr);
383    ierr = PetscMalloc(nrows*sizeof(int), &max_row_nnz);  CHKERRQ(ierr);
384    if (!(diag && val && lvec && max_row_nnz))
385    {
386       SETERRQ( PETSC_ERR_MEM,
387               "Allocation error in spbas_incomplete_cholesky\n");
388    }
389 
390    memset((void*) val, 0, nrows*sizeof(PetscScalar));
391    memset((void*) lvec, 0, nrows*sizeof(PetscScalar));
392    memset((void*) max_row_nnz, 0, nrows*sizeof(int));
393 
394    /* Check correct format of sparseness pattern */
395    if (pattern.col_idx_type != SPBAS_DIAGONAL_OFFSETS)
396    {
397       SETERRQ( PETSC_ERR_SUP_SYS,
398                "Error in spbas_incomplete_cholesky: must have diagonal offsets in pattern\n");
399    }
400 
401    /* Count the nonzeros on transpose of pattern */
402    for (i = 0; i<nrows; i++)
403    {
404       p_nnz  = pattern.row_nnz[i];
405       p_icol = pattern.icols[i];
406       for (j=0; j<p_nnz; j++)
407       {
408          max_row_nnz[i+p_icol[j]]++;
409       }
410    }
411 
412    /* Calculate rows of L */
413    for (i = 0; i<nrows; i++)
414    {
415       p_nnz  = pattern.row_nnz[i];
416       p_icol = pattern.icols[i];
417 
418       r_nnz  = retval.row_nnz[i];
419       r_icol = retval.icols[i];
420       r_val  = retval.values[i];
421       A_nnz  = ai[rip[i]+1] - ai[rip[i]];
422       A_icol = &aj[ai[rip[i]]];
423       A_val  = &aa[ai[rip[i]]];
424 
425       /* Calculate  val += A(i,i:n)'; */
426       for (j=0; j<A_nnz; j++)
427       {
428          k = riip[A_icol[j]];
429          if (k>=i) { val[k] = A_val[j]; }
430       }
431 
432       /*  Add regularization */
433       val[i] += epsdiag;
434 
435       /* Calculate lvec   = diag(D(0:i-1)) * L(0:i-1,i);
436 	 val(i) = A(i,i) - L(0:i-1,i)' * lvec */
437       for ( j=0; j<r_nnz; j++)
438       {
439          k           = r_icol[j];
440          lvec[k]     = diag[k] * r_val[j];
441          val[i]     -= r_val[j] * lvec[k];
442       }
443 
444       /* Calculate the new diagonal */
445       diag[i] = val[i];
446       if (PetscRealPart(diag[i])<droptol)
447       {
448          ierr = PetscInfo(PETSC_NULL,"Error in spbas_incomplete_cholesky:\n");
449          ierr = PetscInfo1(PETSC_NULL,"Negative diagonal in row %d\n",i+1);
450 
451          /* Delete the whole matrix at once. */
452          ierr = spbas_delete(retval);
453          return NEGATIVE_DIAGONAL;
454       }
455 
456       /* If necessary, allocate arrays */
457       if (r_nnz==0)
458       {
459          ierr = spbas_cholesky_row_alloc( retval, i, 1, &n_alloc_used);
460          if (ierr == PETSC_ERR_MEM)
461          {
462             ierr = spbas_cholesky_garbage_collect( &retval,  i, &n_row_alloc_ok,
463                                &n_alloc_used, max_row_nnz);
464             CHKERRQ(ierr);
465             ierr = spbas_cholesky_row_alloc( retval, i, 1, &n_alloc_used);
466          }
467          CHKERRQ(ierr);
468          r_icol = retval.icols[i];
469          r_val  = retval.values[i];
470       }
471 
472       /* Now, fill in */
473       r_icol[r_nnz] = i;
474       r_val [r_nnz] = 1.0;
475       r_nnz++;
476       retval.row_nnz[i]++;
477 
478       retval.nnz += r_nnz;
479 
480       /* Calculate
481          val(i+1:n) = (A(i,i+1:n)- L(0:i-1,i+1:n)' * lvec)/diag(i) */
482       for (j=1; j<p_nnz; j++)
483       {
484          k = i+p_icol[j];
485          r1_icol = retval.icols[k];
486          r1_val  = retval.values[k];
487          for (jL=0; jL<retval.row_nnz[k]; jL++)
488          {
489             val[k] -= r1_val[jL] * lvec[r1_icol[jL]];
490          }
491          val[k] /= diag[i];
492 
493          if (PetscAbsScalar(val[k]) > droptol || PetscAbsScalar(val[k])< -droptol)
494          {
495 	   /* If necessary, allocate arrays */
496             if (retval.row_nnz[k]==0)
497             {
498                ierr = spbas_cholesky_row_alloc( retval, k, max_row_nnz[k],
499                             &n_alloc_used);
500                if (ierr == PETSC_ERR_MEM)
501                {
502                   ierr = spbas_cholesky_garbage_collect(
503                                      &retval,  i, &n_row_alloc_ok,
504                                      &n_alloc_used, max_row_nnz);
505                   CHKERRQ(ierr);
506                   ierr = spbas_cholesky_row_alloc( retval, k, max_row_nnz[k],
507                             &n_alloc_used);
508                   r_icol = retval.icols[i];
509                   r_val  = retval.values[i];
510                }
511                CHKERRQ(ierr);
512             }
513 
514             retval.icols[k][retval.row_nnz[k]] = i;
515             retval.values[k][retval.row_nnz[k]] = val[k];
516             retval.row_nnz[k]++;
517          }
518          val[k] = 0;
519       }
520 
521       /*Erase the values used in the work arrays */
522       for (j=0; j<r_nnz; j++) { lvec[r_icol[j]] = 0; }
523    }
524 
525    ierr=PetscFree(lvec); CHKERRQ(ierr); ierr=PetscFree(val); CHKERRQ(ierr);
526 
527    ierr = spbas_cholesky_garbage_collect( &retval, nrows, &n_row_alloc_ok,
528                                      &n_alloc_used, max_row_nnz);
529    ierr=PetscFree(max_row_nnz); CHKERRQ(ierr);
530 
531    /* Place the inverse of the diagonals in the matrix */
532    for (i=0; i<nrows; i++)
533    {
534       r_nnz = retval.row_nnz[i];
535       retval.values[i][r_nnz-1] = 1.0 / diag[i];
536       for (j=0; j<r_nnz-1; j++)
537       {
538          retval.values[i][j] *= -1;
539       }
540    }
541    ierr=PetscFree(diag); CHKERRQ(ierr);
542 
543 
544    *matrix_L = retval;
545 
546    PetscFunctionReturn(0);
547 }
548 
549