xref: /petsc/src/mat/impls/aij/seq/bas/spbas_cholesky.h (revision 009bbdc485cd9ad46be9940d3549e2dde9cdc322)
1 
2 /*
3    spbas_cholesky_row_alloc:
4       in the data arrays, find a place where another row may be stored.
5       Return PETSC_ERR_MEM if there is insufficient space to store the
6       row, so garbage collection and/or re-allocation may be done.
7 */
8 #undef __FUNCT__
9 #define __FUNCT__ "spbas_cholesky_row_alloc"
10 PetscErrorCode spbas_cholesky_row_alloc(spbas_matrix retval, PetscInt k, PetscInt r_nnz,PetscInt * n_alloc_used)
11 {
12   PetscFunctionBegin;
13   retval.icols[k]  = &retval.alloc_icol[*n_alloc_used];
14   retval.values[k] = &retval.alloc_val[*n_alloc_used];
15   *n_alloc_used   += r_nnz;
16   if (*n_alloc_used > retval.n_alloc_icol) PetscFunctionReturn(PETSC_ERR_MEM);
17   PetscFunctionReturn(0);
18 }
19 
20 
21 /*
22   spbas_cholesky_garbage_collect:
23      move the rows which have been calculated so far, as well as
24      those currently under construction, to the front of the
25      array, while putting them in the proper order.
26      When it seems necessary, enlarge the current arrays.
27 
28      NB: re-allocation is being simulated using
29          PetscMalloc, memcpy, PetscFree, because
30          PetscRealloc does not seem to exist.
31 
32 */
33 #undef __FUNCT__
34 #define __FUNCT__ "spbas_cholesky_garbage_collect"
35 PetscErrorCode spbas_cholesky_garbage_collect(spbas_matrix *result,         /* I/O: the Cholesky factor matrix being constructed.
36                                                                                     Only the storage, not the contents of this matrix
37                                                                                     is changed in this function */
38                                               PetscInt     i_row,           /* I  : Number of rows for which the final contents are known */
39                                               PetscInt     *n_row_alloc_ok, /* I/O: Number of rows which are already in their final
40                                                                                     places in the arrays: they need not be moved any more */
41                                               PetscInt     *n_alloc_used,   /* I/O:  */
42                                               PetscInt     *max_row_nnz)    /* I  : Over-estimate of the number of nonzeros needed to store each row */
43 {
44 /* PSEUDO-CODE:
45   1. Choose the appropriate size for the arrays
46   2. Rescue the arrays which would be lost during garbage collection
47   3. Reallocate and correct administration
48   4. Move all arrays so that they are in proper order */
49 
50   PetscInt        i,j;
51   PetscInt        nrows = result->nrows;
52   PetscInt        n_alloc_ok=0;
53   PetscInt        n_alloc_ok_max=0;
54   PetscErrorCode  ierr;
55   PetscInt        need_already= 0;
56   PetscInt        n_rows_ahead=0;
57   PetscInt        max_need_extra= 0;
58   PetscInt        n_alloc_max, n_alloc_est, n_alloc;
59   PetscInt        n_alloc_now = result->n_alloc_icol;
60   PetscInt        *alloc_icol_old = result->alloc_icol;
61   PetscScalar     *alloc_val_old  = result->alloc_val;
62   PetscInt        *icol_rescue;
63   PetscScalar     *val_rescue;
64   PetscInt        n_rescue;
65   PetscInt        n_row_rescue;
66   PetscInt        i_here, i_last, n_copy;
67   const PetscReal xtra_perc = 20;
68 
69   PetscFunctionBegin;
70   /*********************************************************
71   1. Choose appropriate array size
72   Count number of rows and memory usage which is already final */
73   for (i=0;i<i_row; i++)  {
74     n_alloc_ok += result->row_nnz[i];
75     n_alloc_ok_max += max_row_nnz[i];
76   }
77 
78   /* Count currently needed memory usage and future memory requirements
79     (max, predicted)*/
80   for (i=i_row; i<nrows; i++) {
81     if (!result->row_nnz[i]) {
82       max_need_extra  += max_row_nnz[i];
83     } else {
84       need_already    += max_row_nnz[i];
85       n_rows_ahead++;
86     }
87   }
88 
89   /* Make maximal and realistic memory requirement estimates */
90   n_alloc_max = n_alloc_ok + need_already + max_need_extra;
91   n_alloc_est = n_alloc_ok + need_already + (int) (((PetscReal) max_need_extra) *  ((PetscReal) n_alloc_ok) /((PetscReal) n_alloc_ok_max));
92 
93   /* Choose array sizes */
94   if (n_alloc_max == n_alloc_est)  { n_alloc = n_alloc_max; }
95   else if (n_alloc_now >= n_alloc_est)  { n_alloc = n_alloc_now; }
96   else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0))  { n_alloc  = n_alloc_max;  }
97   else { n_alloc = (int) (n_alloc_est * (1+xtra_perc/100.0)); }
98 
99   /* If new estimate is less than what we already have,
100     don't reallocate, just garbage-collect */
101   if (n_alloc_max != n_alloc_est && n_alloc < result->n_alloc_icol) {
102     n_alloc = result->n_alloc_icol;
103   }
104 
105   /* Motivate dimension choice */
106   ierr = PetscInfo1(PETSC_NULL,"   Allocating %d nonzeros: ",n_alloc);CHKERRQ(ierr);
107   if (n_alloc_max == n_alloc_est) { ierr = PetscInfo(PETSC_NULL,"this is the correct size\n");CHKERRQ(ierr); }
108   else if (n_alloc_now >= n_alloc_est) { ierr = PetscInfo(PETSC_NULL,"the current size, which seems enough\n");CHKERRQ(ierr); }
109   else if (n_alloc_max < n_alloc_est * (1+xtra_perc/100.0)) { ierr = PetscInfo(PETSC_NULL,"the maximum estimate\n");CHKERRQ(ierr); }
110   else { ierr = PetscInfo1(PETSC_NULL,"%6.2f %% more than the estimate\n",xtra_perc);CHKERRQ(ierr); }
111 
112 
113   /**********************************************************
114   2. Rescue arrays which would be lost
115   Count how many rows and nonzeros will have to be rescued
116   when moving all arrays in place */
117   n_row_rescue = 0; n_rescue = 0;
118   if (*n_row_alloc_ok==0) { *n_alloc_used = 0; }
119   else  {
120     i = *n_row_alloc_ok - 1;
121     *n_alloc_used = (result->icols[i]-result->alloc_icol) +  result->row_nnz[i];
122   }
123 
124   for (i=*n_row_alloc_ok; i<nrows; i++) {
125     i_here = result->icols[i]-result->alloc_icol;
126     i_last = i_here + result->row_nnz[i];
127     if (result->row_nnz[i]>0) {
128       if (*n_alloc_used > i_here || i_last > n_alloc) {
129         n_rescue += result->row_nnz[i];
130         n_row_rescue++;
131       }
132 
133       if (i<i_row) {  *n_alloc_used += result->row_nnz[i];}
134       else         {  *n_alloc_used += max_row_nnz[i];}
135     }
136   }
137 
138   /* Allocate rescue arrays */
139   ierr = PetscMalloc(n_rescue * sizeof(int), &icol_rescue);CHKERRQ(ierr);
140   ierr = PetscMalloc(n_rescue * sizeof(PetscScalar), &val_rescue);CHKERRQ(ierr);
141 
142   /* Rescue the arrays which need rescuing */
143   n_row_rescue = 0; n_rescue = 0;
144   if (*n_row_alloc_ok==0) { *n_alloc_used = 0; }
145   else {
146     i = *n_row_alloc_ok - 1;
147     *n_alloc_used = (result->icols[i]-result->alloc_icol) +  result->row_nnz[i];
148   }
149 
150   for (i=*n_row_alloc_ok; i<nrows; i++) {
151     i_here = result->icols[i]-result->alloc_icol;
152     i_last = i_here + result->row_nnz[i];
153     if (result->row_nnz[i]>0) {
154       if (*n_alloc_used > i_here || i_last > n_alloc) {
155         ierr = PetscMemcpy((void*) &icol_rescue[n_rescue], (void*) result->icols[i], result->row_nnz[i] * sizeof(int));CHKERRQ(ierr);
156         ierr = PetscMemcpy((void*) &val_rescue[n_rescue], (void*) result->values[i], result->row_nnz[i] * sizeof(PetscScalar));CHKERRQ(ierr);
157         n_rescue += result->row_nnz[i];
158         n_row_rescue++;
159       }
160 
161       if (i<i_row) {  *n_alloc_used += result->row_nnz[i];}
162       else         {  *n_alloc_used += max_row_nnz[i];}
163     }
164   }
165 
166   /**********************************************************
167   3. Reallocate and correct administration */
168 
169   if (n_alloc != result->n_alloc_icol)  {
170     n_copy = PetscMin(n_alloc,result->n_alloc_icol);
171 
172     /* PETSC knows no REALLOC, so we'll REALLOC ourselves.
173 
174         Allocate new icol-data, copy old contents */
175     ierr = PetscMalloc(n_alloc * sizeof(int), &result->alloc_icol);CHKERRQ(ierr);
176     ierr = PetscMemcpy(result->alloc_icol, alloc_icol_old, n_copy*sizeof(int));CHKERRQ(ierr);
177 
178     /* Update administration, Reset pointers to new arrays  */
179     result->n_alloc_icol = n_alloc;
180     for (i=0; i<nrows; i++) {
181       result->icols[i] =  result->alloc_icol + (result->icols[i]  - alloc_icol_old);
182       result->values[i] =  result->alloc_val + (result->icols[i]  - result->alloc_icol);
183     }
184 
185     /* Delete old array */
186     ierr = PetscFree(alloc_icol_old);CHKERRQ(ierr);
187 
188     /* Allocate new value-data, copy old contents */
189     ierr = PetscMalloc(n_alloc * sizeof(PetscScalar), &result->alloc_val);CHKERRQ(ierr);
190     ierr = PetscMemcpy(result->alloc_val, alloc_val_old, n_copy*sizeof(PetscScalar));CHKERRQ(ierr);
191 
192     /* Update administration, Reset pointers to new arrays  */
193     result->n_alloc_val  = n_alloc;
194     for (i=0; i<nrows; i++) {
195       result->values[i] =  result->alloc_val + (result->icols[i]  - result->alloc_icol);
196     }
197 
198     /* Delete old array */
199     ierr = PetscFree(alloc_val_old);CHKERRQ(ierr);
200   }
201 
202 
203   /*********************************************************
204   4. Copy all the arrays to their proper places */
205   n_row_rescue = 0; n_rescue = 0;
206   if (*n_row_alloc_ok==0) { *n_alloc_used = 0; }
207   else {
208     i = *n_row_alloc_ok - 1;
209     *n_alloc_used = (result->icols[i]-result->alloc_icol) +  result->row_nnz[i];
210   }
211 
212   for (i=*n_row_alloc_ok; i<nrows; i++) {
213     i_here = result->icols[i]-result->alloc_icol;
214     i_last = i_here + result->row_nnz[i];
215 
216     result->icols[i] = result->alloc_icol + *n_alloc_used;
217     result->values[i]= result->alloc_val  + *n_alloc_used;
218 
219     if (result->row_nnz[i]>0) {
220       if (*n_alloc_used > i_here || i_last > n_alloc)  {
221         ierr = PetscMemcpy((void*) result->icols[i],  (void*) &icol_rescue[n_rescue], result->row_nnz[i] * sizeof(int));CHKERRQ(ierr);
222         ierr = PetscMemcpy((void*) result->values[i], (void*) &val_rescue[n_rescue],result->row_nnz[i] * sizeof(PetscScalar));CHKERRQ(ierr);
223         n_rescue += result->row_nnz[i];
224         n_row_rescue++;
225       } else {
226         for (j=0; j<result->row_nnz[i]; j++) {
227             result->icols[i][j]   = result->alloc_icol[i_here+j];
228             result->values[i][j]  = result->alloc_val[ i_here+j];
229         }
230       }
231       if (i<i_row) {  *n_alloc_used += result->row_nnz[i];}
232       else         {  *n_alloc_used += max_row_nnz[i];}
233     }
234   }
235 
236   /* Delete the rescue arrays */
237   ierr = PetscFree(icol_rescue);CHKERRQ(ierr);
238   ierr = PetscFree(val_rescue);CHKERRQ(ierr);
239   *n_row_alloc_ok = i_row;
240   PetscFunctionReturn(0);
241 }
242 
243 /*
244   spbas_incomplete_cholesky:
245      incomplete Cholesky decomposition of a square, symmetric,
246      positive definite matrix.
247 
248      In case negative diagonals are encountered, function returns
249      NEGATIVE_DIAGONAL. When this happens, call this function again
250      with a larger epsdiag_in, a less sparse pattern, and/or a smaller
251      droptol
252 */
253 #undef __FUNCT__
254 #define __FUNCT__ "spbas_incomplete_cholesky"
255 PetscErrorCode spbas_incomplete_cholesky(Mat A, const PetscInt *rip, const PetscInt *riip, spbas_matrix pattern, PetscReal droptol, PetscReal epsdiag_in, spbas_matrix * matrix_L)
256 {
257   PetscInt         jL;
258   Mat_SeqAIJ       *a=(Mat_SeqAIJ*)A->data;
259   PetscInt         *ai=a->i,*aj=a->j;
260   MatScalar        *aa=a->a;
261   PetscInt         nrows, ncols;
262   PetscInt         *max_row_nnz;
263   PetscErrorCode   ierr;
264   spbas_matrix     retval;
265   PetscScalar      * diag;
266   PetscScalar      * val;
267   PetscScalar      * lvec;
268   PetscScalar      epsdiag;
269   PetscInt         i,j,k;
270   const PetscBool  do_values = PETSC_TRUE;
271   PetscInt         * r1_icol;
272   PetscScalar      *r1_val;
273   PetscInt         * r_icol;
274   PetscInt         r_nnz;
275   PetscScalar      *r_val;
276   PetscInt         * A_icol;
277   PetscInt         A_nnz;
278   PetscScalar      *A_val;
279   PetscInt         * p_icol;
280   PetscInt         p_nnz;
281   PetscInt         n_row_alloc_ok = 0;  /* number of rows which have been stored   correctly in the matrix */
282   PetscInt         n_alloc_used = 0;    /* part of result->icols and result->values   which is currently being used */
283 
284   PetscFunctionBegin;
285   /* Convert the Manteuffel shift from 'fraction of average diagonal' to   dimensioned value */
286   ierr = MatGetSize(A, &nrows, &ncols);CHKERRQ(ierr);
287   ierr = MatGetTrace(A, &epsdiag);CHKERRQ(ierr);
288   epsdiag *= epsdiag_in / nrows;
289   ierr = PetscInfo2(PETSC_NULL,"   Dimensioned Manteuffel shift %G Drop tolerance %G\n", PetscRealPart(epsdiag),droptol);CHKERRQ(ierr);
290 
291   if (droptol<1e-10) {droptol=1e-10;}
292 
293   if ((nrows != pattern.nrows) || (ncols != pattern.ncols) || (ncols != nrows)) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_ARG_INCOMP,"Dimension error in spbas_incomplete_cholesky\n");
294 
295   retval.nrows = nrows;
296   retval.ncols = nrows;
297   retval.nnz   = pattern.nnz/10;
298   retval.col_idx_type = SPBAS_COLUMN_NUMBERS;
299   retval.block_data = PETSC_TRUE;
300 
301   ierr =  spbas_allocate_pattern(&retval, do_values);CHKERRQ(ierr);
302   ierr = PetscMemzero((void*) retval.row_nnz, nrows*sizeof(int));CHKERRQ(ierr);
303   ierr =  spbas_allocate_data(&retval);CHKERRQ(ierr);
304   retval.nnz   = 0;
305 
306   ierr = PetscMalloc(nrows*sizeof(PetscScalar), &diag);CHKERRQ(ierr);
307   ierr = PetscMalloc(nrows*sizeof(PetscScalar), &val);CHKERRQ(ierr);
308   ierr = PetscMalloc(nrows*sizeof(PetscScalar), &lvec);CHKERRQ(ierr);
309   ierr = PetscMalloc(nrows*sizeof(int), &max_row_nnz);CHKERRQ(ierr);
310   if (!(diag && val && lvec && max_row_nnz))  SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MEM, "Allocation error in spbas_incomplete_cholesky\n");
311 
312   ierr = PetscMemzero((void*) val, nrows*sizeof(PetscScalar));CHKERRQ(ierr);
313   ierr = PetscMemzero((void*) lvec, nrows*sizeof(PetscScalar));CHKERRQ(ierr);
314   ierr = PetscMemzero((void*) max_row_nnz, nrows*sizeof(int));CHKERRQ(ierr);
315 
316   /* Check correct format of sparseness pattern */
317   if (pattern.col_idx_type != SPBAS_DIAGONAL_OFFSETS)  SETERRQ(PETSC_COMM_SELF, PETSC_ERR_SUP_SYS, "Error in spbas_incomplete_cholesky: must have diagonal offsets in pattern\n");
318 
319   /* Count the nonzeros on transpose of pattern */
320   for (i = 0; i<nrows; i++)  {
321     p_nnz  = pattern.row_nnz[i];
322     p_icol = pattern.icols[i];
323     for (j=0; j<p_nnz; j++)  {
324         max_row_nnz[i+p_icol[j]]++;
325     }
326   }
327 
328   /* Calculate rows of L */
329   for (i = 0; i<nrows; i++)  {
330     p_nnz  = pattern.row_nnz[i];
331     p_icol = pattern.icols[i];
332 
333     r_nnz  = retval.row_nnz[i];
334     r_icol = retval.icols[i];
335     r_val  = retval.values[i];
336     A_nnz  = ai[rip[i]+1] - ai[rip[i]];
337     A_icol = &aj[ai[rip[i]]];
338     A_val  = &aa[ai[rip[i]]];
339 
340     /* Calculate  val += A(i,i:n)'; */
341     for (j=0; j<A_nnz; j++) {
342       k = riip[A_icol[j]];
343       if (k>=i) { val[k] = A_val[j]; }
344     }
345 
346     /*  Add regularization */
347     val[i] += epsdiag;
348 
349     /* Calculate lvec   = diag(D(0:i-1)) * L(0:i-1,i);
350         val(i) = A(i,i) - L(0:i-1,i)' * lvec */
351     for (j=0; j<r_nnz; j++)  {
352       k           = r_icol[j];
353       lvec[k]     = diag[k] * r_val[j];
354       val[i]     -= r_val[j] * lvec[k];
355     }
356 
357     /* Calculate the new diagonal */
358     diag[i] = val[i];
359     if (PetscRealPart(diag[i])<droptol)  {
360       ierr = PetscInfo(PETSC_NULL,"Error in spbas_incomplete_cholesky:\n");
361       ierr = PetscInfo1(PETSC_NULL,"Negative diagonal in row %d\n",i+1);
362 
363       /* Delete the whole matrix at once. */
364       ierr = spbas_delete(retval);
365       return NEGATIVE_DIAGONAL;
366     }
367 
368     /* If necessary, allocate arrays */
369     if (r_nnz==0) {
370       ierr = spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used);CHKERRQ(ierr);
371       if (ierr == PETSC_ERR_MEM) {
372         ierr = spbas_cholesky_garbage_collect(&retval,  i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr);
373         ierr = spbas_cholesky_row_alloc(retval, i, 1, &n_alloc_used);
374       }
375       r_icol = retval.icols[i];
376       r_val  = retval.values[i];
377     }
378 
379     /* Now, fill in */
380     r_icol[r_nnz] = i;
381     r_val [r_nnz] = 1.0;
382     r_nnz++;
383     retval.row_nnz[i]++;
384 
385     retval.nnz += r_nnz;
386 
387     /* Calculate
388         val(i+1:n) = (A(i,i+1:n)- L(0:i-1,i+1:n)' * lvec)/diag(i) */
389     for (j=1; j<p_nnz; j++)  {
390       k = i+p_icol[j];
391       r1_icol = retval.icols[k];
392       r1_val  = retval.values[k];
393       for (jL=0; jL<retval.row_nnz[k]; jL++)   {
394         val[k] -= r1_val[jL] * lvec[r1_icol[jL]];
395       }
396       val[k] /= diag[i];
397 
398       if (PetscAbsScalar(val[k]) > droptol || PetscAbsScalar(val[k])< -droptol) {
399         /* If necessary, allocate arrays */
400         if (retval.row_nnz[k]==0) {
401           ierr = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);
402           if (ierr == PETSC_ERR_MEM) {
403             ierr = spbas_cholesky_garbage_collect(&retval,  i, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr);
404             ierr = spbas_cholesky_row_alloc(retval, k, max_row_nnz[k], &n_alloc_used);CHKERRQ(ierr);
405             r_icol = retval.icols[i];
406             r_val  = retval.values[i];
407           }
408         }
409 
410         retval.icols[k][retval.row_nnz[k]] = i;
411         retval.values[k][retval.row_nnz[k]] = val[k];
412         retval.row_nnz[k]++;
413       }
414       val[k] = 0;
415     }
416 
417     /* Erase the values used in the work arrays */
418     for (j=0; j<r_nnz; j++) { lvec[r_icol[j]] = 0; }
419   }
420 
421   ierr=PetscFree(lvec);CHKERRQ(ierr);
422   ierr=PetscFree(val);CHKERRQ(ierr);
423 
424   ierr = spbas_cholesky_garbage_collect(&retval, nrows, &n_row_alloc_ok, &n_alloc_used, max_row_nnz);CHKERRQ(ierr);
425   ierr=PetscFree(max_row_nnz);CHKERRQ(ierr);
426 
427   /* Place the inverse of the diagonals in the matrix */
428   for (i=0; i<nrows; i++) {
429     r_nnz = retval.row_nnz[i];
430     retval.values[i][r_nnz-1] = 1.0 / diag[i];
431     for (j=0; j<r_nnz-1; j++) {
432         retval.values[i][j] *= -1;
433     }
434   }
435   ierr=PetscFree(diag);CHKERRQ(ierr);
436   *matrix_L = retval;
437   PetscFunctionReturn(0);
438 }
439 
440