xref: /petsc/src/ksp/pc/impls/spai/ispai.c (revision 5fd668637986a8d8518383a9159eebc368e1d5b4)
1 
2 /*
3    3/99 Modified by Stephen Barnard to support SPAI version 3.0
4 */
5 
6 /*
7       Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner
8    Code written by Stephen Barnard.
9 
10       Note: there is some BAD memory bleeding below!
11 
12       This code needs work
13 
14    1) get rid of all memory bleeding
15    2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix
16       rather than having the sp flag for PC_SPAI
17    3) fix to set the block size based on the matrix block size
18 
19 */
20 
21 #include <petsc-private/pcimpl.h>        /*I "petscpc.h" I*/
22 #include "petscspai.h"
23 
24 /*
25     These are the SPAI include files
26 */
27 EXTERN_C_BEGIN
28 #define MPI /* required for setting SPAI_Comm correctly in basics.h */
29 #include <spai.h>
30 #include <matrix.h>
31 EXTERN_C_END
32 
33 extern PetscErrorCode ConvertMatToMatrix(MPI_Comm,Mat,Mat,matrix**);
34 extern PetscErrorCode ConvertMatrixToMat(MPI_Comm,matrix *,Mat *);
35 extern PetscErrorCode ConvertVectorToVec(MPI_Comm,vector *v,Vec *Pv);
36 extern PetscErrorCode MM_to_PETSC(char *,char *,char *);
37 
38 typedef struct {
39 
40   matrix   *B;              /* matrix in SPAI format */
41   matrix   *BT;             /* transpose of matrix in SPAI format */
42   matrix   *M;              /* the approximate inverse in SPAI format */
43 
44   Mat      PM;              /* the approximate inverse PETSc format */
45 
46   double   epsilon;         /* tolerance */
47   int      nbsteps;         /* max number of "improvement" steps per line */
48   int      max;             /* max dimensions of is_I, q, etc. */
49   int      maxnew;          /* max number of new entries per step */
50   int      block_size;      /* constant block size */
51   int      cache_size;      /* one of (1,2,3,4,5,6) indicting size of cache */
52   int      verbose;         /* SPAI prints timing and statistics */
53 
54   int      sp;              /* symmetric nonzero pattern */
55   MPI_Comm comm_spai;     /* communicator to be used with spai */
56 } PC_SPAI;
57 
58 /**********************************************************************/
59 
60 #undef __FUNCT__
61 #define __FUNCT__ "PCSetUp_SPAI"
62 static PetscErrorCode PCSetUp_SPAI(PC pc)
63 {
64   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
65   PetscErrorCode ierr;
66   Mat            AT;
67 
68   PetscFunctionBegin;
69   init_SPAI();
70 
71   if (ispai->sp) {
72     ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,pc->pmat,&ispai->B);CHKERRQ(ierr);
73   } else {
74     /* Use the transpose to get the column nonzero structure. */
75     ierr = MatTranspose(pc->pmat,MAT_INITIAL_MATRIX,&AT);CHKERRQ(ierr);
76     ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,AT,&ispai->B);CHKERRQ(ierr);
77     ierr = MatDestroy(&AT);CHKERRQ(ierr);
78   }
79 
80   /* Destroy the transpose */
81   /* Don't know how to do it. PETSc developers? */
82 
83   /* construct SPAI preconditioner */
84   /* FILE *messages */     /* file for warning messages */
85   /* double epsilon */     /* tolerance */
86   /* int nbsteps */        /* max number of "improvement" steps per line */
87   /* int max */            /* max dimensions of is_I, q, etc. */
88   /* int maxnew */         /* max number of new entries per step */
89   /* int block_size */     /* block_size == 1 specifies scalar elments
90                               block_size == n specifies nxn constant-block elements
91                               block_size == 0 specifies variable-block elements */
92   /* int cache_size */     /* one of (1,2,3,4,5,6) indicting size of cache */
93                            /* cache_size == 0 indicates no caching */
94   /* int    verbose    */  /* verbose == 0 specifies that SPAI is silent
95                               verbose == 1 prints timing and matrix statistics */
96 
97   ierr = bspai(ispai->B,&ispai->M,
98                stdout,
99                ispai->epsilon,
100                ispai->nbsteps,
101                ispai->max,
102                ispai->maxnew,
103                ispai->block_size,
104                ispai->cache_size,
105                ispai->verbose);CHKERRQ(ierr);
106 
107   ierr = ConvertMatrixToMat(((PetscObject)pc)->comm,ispai->M,&ispai->PM);CHKERRQ(ierr);
108 
109   /* free the SPAI matrices */
110   sp_free_matrix(ispai->B);
111   sp_free_matrix(ispai->M);
112 
113   PetscFunctionReturn(0);
114 }
115 
116 /**********************************************************************/
117 
118 #undef __FUNCT__
119 #define __FUNCT__ "PCApply_SPAI"
120 static PetscErrorCode PCApply_SPAI(PC pc,Vec xx,Vec y)
121 {
122   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
123   PetscErrorCode ierr;
124 
125   PetscFunctionBegin;
126   /* Now using PETSc's multiply */
127   ierr = MatMult(ispai->PM,xx,y);CHKERRQ(ierr);
128   PetscFunctionReturn(0);
129 }
130 
131 /**********************************************************************/
132 
133 #undef __FUNCT__
134 #define __FUNCT__ "PCDestroy_SPAI"
135 static PetscErrorCode PCDestroy_SPAI(PC pc)
136 {
137   PetscErrorCode ierr;
138   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
139 
140   PetscFunctionBegin;
141   ierr = MatDestroy(&ispai->PM);CHKERRQ(ierr);
142   ierr = MPI_Comm_free(&(ispai->comm_spai));CHKERRQ(ierr);
143   ierr = PetscFree(pc->data);CHKERRQ(ierr);
144   PetscFunctionReturn(0);
145 }
146 
147 /**********************************************************************/
148 
149 #undef __FUNCT__
150 #define __FUNCT__ "PCView_SPAI"
151 static PetscErrorCode PCView_SPAI(PC pc,PetscViewer viewer)
152 {
153   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
154   PetscErrorCode ierr;
155   PetscBool      iascii;
156 
157   PetscFunctionBegin;
158   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
159   if (iascii) {
160     ierr = PetscViewerASCIIPrintf(viewer,"    SPAI preconditioner\n");CHKERRQ(ierr);
161     ierr = PetscViewerASCIIPrintf(viewer,"    epsilon %G\n",   ispai->epsilon);CHKERRQ(ierr);
162     ierr = PetscViewerASCIIPrintf(viewer,"    nbsteps %d\n",   ispai->nbsteps);CHKERRQ(ierr);
163     ierr = PetscViewerASCIIPrintf(viewer,"    max %d\n",       ispai->max);CHKERRQ(ierr);
164     ierr = PetscViewerASCIIPrintf(viewer,"    maxnew %d\n",    ispai->maxnew);CHKERRQ(ierr);
165     ierr = PetscViewerASCIIPrintf(viewer,"    block_size %d\n",ispai->block_size);CHKERRQ(ierr);
166     ierr = PetscViewerASCIIPrintf(viewer,"    cache_size %d\n",ispai->cache_size);CHKERRQ(ierr);
167     ierr = PetscViewerASCIIPrintf(viewer,"    verbose %d\n",   ispai->verbose);CHKERRQ(ierr);
168     ierr = PetscViewerASCIIPrintf(viewer,"    sp %d\n",        ispai->sp);CHKERRQ(ierr);
169   }
170   PetscFunctionReturn(0);
171 }
172 
173 EXTERN_C_BEGIN
174 #undef __FUNCT__
175 #define __FUNCT__ "PCSPAISetEpsilon_SPAI"
176 PetscErrorCode  PCSPAISetEpsilon_SPAI(PC pc,double epsilon1)
177 {
178   PC_SPAI *ispai = (PC_SPAI*)pc->data;
179 
180   PetscFunctionBegin;
181   ispai->epsilon = epsilon1;
182   PetscFunctionReturn(0);
183 }
184 EXTERN_C_END
185 
186 /**********************************************************************/
187 
188 EXTERN_C_BEGIN
189 #undef __FUNCT__
190 #define __FUNCT__ "PCSPAISetNBSteps_SPAI"
191 PetscErrorCode  PCSPAISetNBSteps_SPAI(PC pc,int nbsteps1)
192 {
193   PC_SPAI *ispai = (PC_SPAI*)pc->data;
194 
195   PetscFunctionBegin;
196   ispai->nbsteps = nbsteps1;
197   PetscFunctionReturn(0);
198 }
199 EXTERN_C_END
200 
201 /**********************************************************************/
202 
203 /* added 1/7/99 g.h. */
204 EXTERN_C_BEGIN
205 #undef __FUNCT__
206 #define __FUNCT__ "PCSPAISetMax_SPAI"
207 PetscErrorCode  PCSPAISetMax_SPAI(PC pc,int max1)
208 {
209   PC_SPAI *ispai = (PC_SPAI*)pc->data;
210 
211   PetscFunctionBegin;
212   ispai->max = max1;
213   PetscFunctionReturn(0);
214 }
215 EXTERN_C_END
216 
217 /**********************************************************************/
218 
219 EXTERN_C_BEGIN
220 #undef __FUNCT__
221 #define __FUNCT__ "PCSPAISetMaxNew_SPAI"
222 PetscErrorCode  PCSPAISetMaxNew_SPAI(PC pc,int maxnew1)
223 {
224   PC_SPAI *ispai = (PC_SPAI*)pc->data;
225 
226   PetscFunctionBegin;
227   ispai->maxnew = maxnew1;
228   PetscFunctionReturn(0);
229 }
230 EXTERN_C_END
231 
232 /**********************************************************************/
233 
234 EXTERN_C_BEGIN
235 #undef __FUNCT__
236 #define __FUNCT__ "PCSPAISetBlockSize_SPAI"
237 PetscErrorCode  PCSPAISetBlockSize_SPAI(PC pc,int block_size1)
238 {
239   PC_SPAI *ispai = (PC_SPAI*)pc->data;
240 
241   PetscFunctionBegin;
242   ispai->block_size = block_size1;
243   PetscFunctionReturn(0);
244 }
245 EXTERN_C_END
246 
247 /**********************************************************************/
248 
249 EXTERN_C_BEGIN
250 #undef __FUNCT__
251 #define __FUNCT__ "PCSPAISetCacheSize_SPAI"
252 PetscErrorCode  PCSPAISetCacheSize_SPAI(PC pc,int cache_size)
253 {
254   PC_SPAI *ispai = (PC_SPAI*)pc->data;
255 
256   PetscFunctionBegin;
257   ispai->cache_size = cache_size;
258   PetscFunctionReturn(0);
259 }
260 EXTERN_C_END
261 
262 /**********************************************************************/
263 
264 EXTERN_C_BEGIN
265 #undef __FUNCT__
266 #define __FUNCT__ "PCSPAISetVerbose_SPAI"
267 PetscErrorCode  PCSPAISetVerbose_SPAI(PC pc,int verbose)
268 {
269   PC_SPAI    *ispai = (PC_SPAI*)pc->data;
270 
271   PetscFunctionBegin;
272   ispai->verbose = verbose;
273   PetscFunctionReturn(0);
274 }
275 EXTERN_C_END
276 
277 /**********************************************************************/
278 
279 EXTERN_C_BEGIN
280 #undef __FUNCT__
281 #define __FUNCT__ "PCSPAISetSp_SPAI"
282 PetscErrorCode  PCSPAISetSp_SPAI(PC pc,int sp)
283 {
284   PC_SPAI *ispai = (PC_SPAI*)pc->data;
285 
286   PetscFunctionBegin;
287   ispai->sp = sp;
288   PetscFunctionReturn(0);
289 }
290 EXTERN_C_END
291 
292 /* -------------------------------------------------------------------*/
293 
294 #undef __FUNCT__
295 #define __FUNCT__ "PCSPAISetEpsilon"
296 /*@
297   PCSPAISetEpsilon -- Set the tolerance for the SPAI preconditioner
298 
299   Input Parameters:
300 + pc - the preconditioner
301 - eps - epsilon (default .4)
302 
303   Notes:  Espilon must be between 0 and 1. It controls the
304                  quality of the approximation of M to the inverse of
305                  A. Higher values of epsilon lead to more work, more
306                  fill, and usually better preconditioners. In many
307                  cases the best choice of epsilon is the one that
308                  divides the total solution time equally between the
309                  preconditioner and the solver.
310 
311   Level: intermediate
312 
313 .seealso: PCSPAI, PCSetType()
314   @*/
315 PetscErrorCode  PCSPAISetEpsilon(PC pc,double epsilon1)
316 {
317   PetscErrorCode ierr;
318 
319   PetscFunctionBegin;
320   ierr = PetscTryMethod(pc,"PCSPAISetEpsilon_C",(PC,double),(pc,epsilon1));CHKERRQ(ierr);
321   PetscFunctionReturn(0);
322 }
323 
324 /**********************************************************************/
325 
326 #undef __FUNCT__
327 #define __FUNCT__ "PCSPAISetNBSteps"
328 /*@
329   PCSPAISetNBSteps - set maximum number of improvement steps per row in
330         the SPAI preconditioner
331 
332   Input Parameters:
333 + pc - the preconditioner
334 - n - number of steps (default 5)
335 
336   Notes:  SPAI constructs to approximation to every column of
337                  the exact inverse of A in a series of improvement
338                  steps. The quality of the approximation is determined
339                  by epsilon. If an approximation achieving an accuracy
340                  of epsilon is not obtained after ns steps, SPAI simply
341                  uses the best approximation constructed so far.
342 
343   Level: intermediate
344 
345 .seealso: PCSPAI, PCSetType(), PCSPAISetMaxNew()
346 @*/
347 PetscErrorCode  PCSPAISetNBSteps(PC pc,int nbsteps1)
348 {
349   PetscErrorCode ierr;
350 
351   PetscFunctionBegin;
352   ierr = PetscTryMethod(pc,"PCSPAISetNBSteps_C",(PC,int),(pc,nbsteps1));CHKERRQ(ierr);
353   PetscFunctionReturn(0);
354 }
355 
356 /**********************************************************************/
357 
358 /* added 1/7/99 g.h. */
359 #undef __FUNCT__
360 #define __FUNCT__ "PCSPAISetMax"
361 /*@
362   PCSPAISetMax - set the size of various working buffers in
363         the SPAI preconditioner
364 
365   Input Parameters:
366 + pc - the preconditioner
367 - n - size (default is 5000)
368 
369   Level: intermediate
370 
371 .seealso: PCSPAI, PCSetType()
372 @*/
373 PetscErrorCode  PCSPAISetMax(PC pc,int max1)
374 {
375   PetscErrorCode ierr;
376 
377   PetscFunctionBegin;
378   ierr = PetscTryMethod(pc,"PCSPAISetMax_C",(PC,int),(pc,max1));CHKERRQ(ierr);
379   PetscFunctionReturn(0);
380 }
381 
382 /**********************************************************************/
383 
384 #undef __FUNCT__
385 #define __FUNCT__ "PCSPAISetMaxNew"
386 /*@
387   PCSPAISetMaxNew - set maximum number of new nonzero candidates per step
388    in SPAI preconditioner
389 
390   Input Parameters:
391 + pc - the preconditioner
392 - n - maximum number (default 5)
393 
394   Level: intermediate
395 
396 .seealso: PCSPAI, PCSetType(), PCSPAISetNBSteps()
397 @*/
398 PetscErrorCode  PCSPAISetMaxNew(PC pc,int maxnew1)
399 {
400   PetscErrorCode ierr;
401 
402   PetscFunctionBegin;
403   ierr = PetscTryMethod(pc,"PCSPAISetMaxNew_C",(PC,int),(pc,maxnew1));CHKERRQ(ierr);
404   PetscFunctionReturn(0);
405 }
406 
407 /**********************************************************************/
408 
409 #undef __FUNCT__
410 #define __FUNCT__ "PCSPAISetBlockSize"
411 /*@
412   PCSPAISetBlockSize - set the block size for the SPAI preconditioner
413 
414   Input Parameters:
415 + pc - the preconditioner
416 - n - block size (default 1)
417 
418   Notes: A block
419                  size of 1 treats A as a matrix of scalar elements. A
420                  block size of s > 1 treats A as a matrix of sxs
421                  blocks. A block size of 0 treats A as a matrix with
422                  variable sized blocks, which are determined by
423                  searching for dense square diagonal blocks in A.
424                  This can be very effective for finite-element
425                  matrices.
426 
427                  SPAI will convert A to block form, use a block
428                  version of the preconditioner algorithm, and then
429                  convert the result back to scalar form.
430 
431                  In many cases the a block-size parameter other than 1
432                  can lead to very significant improvement in
433                  performance.
434 
435 
436   Level: intermediate
437 
438 .seealso: PCSPAI, PCSetType()
439 @*/
440 PetscErrorCode  PCSPAISetBlockSize(PC pc,int block_size1)
441 {
442   PetscErrorCode ierr;
443 
444   PetscFunctionBegin;
445   ierr = PetscTryMethod(pc,"PCSPAISetBlockSize_C",(PC,int),(pc,block_size1));CHKERRQ(ierr);
446   PetscFunctionReturn(0);
447 }
448 
449 /**********************************************************************/
450 
451 #undef __FUNCT__
452 #define __FUNCT__ "PCSPAISetCacheSize"
453 /*@
454   PCSPAISetCacheSize - specify cache size in the SPAI preconditioner
455 
456   Input Parameters:
457 + pc - the preconditioner
458 - n -  cache size {0,1,2,3,4,5} (default 5)
459 
460   Notes:    SPAI uses a hash table to cache messages and avoid
461                  redundant communication. If suggest always using
462                  5. This parameter is irrelevant in the serial
463                  version.
464 
465   Level: intermediate
466 
467 .seealso: PCSPAI, PCSetType()
468 @*/
469 PetscErrorCode  PCSPAISetCacheSize(PC pc,int cache_size)
470 {
471   PetscErrorCode ierr;
472 
473   PetscFunctionBegin;
474   ierr = PetscTryMethod(pc,"PCSPAISetCacheSize_C",(PC,int),(pc,cache_size));CHKERRQ(ierr);
475   PetscFunctionReturn(0);
476 }
477 
478 /**********************************************************************/
479 
480 #undef __FUNCT__
481 #define __FUNCT__ "PCSPAISetVerbose"
482 /*@
483   PCSPAISetVerbose - verbosity level for the SPAI preconditioner
484 
485   Input Parameters:
486 + pc - the preconditioner
487 - n - level (default 1)
488 
489   Notes: print parameters, timings and matrix statistics
490 
491   Level: intermediate
492 
493 .seealso: PCSPAI, PCSetType()
494 @*/
495 PetscErrorCode  PCSPAISetVerbose(PC pc,int verbose)
496 {
497   PetscErrorCode ierr;
498 
499   PetscFunctionBegin;
500   ierr = PetscTryMethod(pc,"PCSPAISetVerbose_C",(PC,int),(pc,verbose));CHKERRQ(ierr);
501   PetscFunctionReturn(0);
502 }
503 
504 /**********************************************************************/
505 
506 #undef __FUNCT__
507 #define __FUNCT__ "PCSPAISetSp"
508 /*@
509   PCSPAISetSp - specify a symmetric matrix sparsity pattern in the SPAI preconditioner
510 
511   Input Parameters:
512 + pc - the preconditioner
513 - n - 0 or 1
514 
515   Notes: If A has a symmetric nonzero pattern use -sp 1 to
516                  improve performance by eliminating some communication
517                  in the parallel version. Even if A does not have a
518                  symmetric nonzero pattern -sp 1 may well lead to good
519                  results, but the code will not follow the published
520                  SPAI algorithm exactly.
521 
522 
523   Level: intermediate
524 
525 .seealso: PCSPAI, PCSetType()
526 @*/
527 PetscErrorCode  PCSPAISetSp(PC pc,int sp)
528 {
529   PetscErrorCode ierr;
530 
531   PetscFunctionBegin;
532   ierr = PetscTryMethod(pc,"PCSPAISetSp_C",(PC,int),(pc,sp));CHKERRQ(ierr);
533   PetscFunctionReturn(0);
534 }
535 
536 /**********************************************************************/
537 
538 /**********************************************************************/
539 
540 #undef __FUNCT__
541 #define __FUNCT__ "PCSetFromOptions_SPAI"
542 static PetscErrorCode PCSetFromOptions_SPAI(PC pc)
543 {
544   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
545   PetscErrorCode ierr;
546   int            nbsteps1,max1,maxnew1,block_size1,cache_size,verbose,sp;
547   double         epsilon1;
548   PetscBool      flg;
549 
550   PetscFunctionBegin;
551   ierr = PetscOptionsHead("SPAI options");CHKERRQ(ierr);
552     ierr = PetscOptionsReal("-pc_spai_epsilon","","PCSPAISetEpsilon",ispai->epsilon,&epsilon1,&flg);CHKERRQ(ierr);
553     if (flg) {
554       ierr = PCSPAISetEpsilon(pc,epsilon1);CHKERRQ(ierr);
555     }
556     ierr = PetscOptionsInt("-pc_spai_nbsteps","","PCSPAISetNBSteps",ispai->nbsteps,&nbsteps1,&flg);CHKERRQ(ierr);
557     if (flg) {
558       ierr = PCSPAISetNBSteps(pc,nbsteps1);CHKERRQ(ierr);
559     }
560     /* added 1/7/99 g.h. */
561     ierr = PetscOptionsInt("-pc_spai_max","","PCSPAISetMax",ispai->max,&max1,&flg);CHKERRQ(ierr);
562     if (flg) {
563       ierr = PCSPAISetMax(pc,max1);CHKERRQ(ierr);
564     }
565     ierr = PetscOptionsInt("-pc_spai_maxnew","","PCSPAISetMaxNew",ispai->maxnew,&maxnew1,&flg);CHKERRQ(ierr);
566     if (flg) {
567       ierr = PCSPAISetMaxNew(pc,maxnew1);CHKERRQ(ierr);
568     }
569     ierr = PetscOptionsInt("-pc_spai_block_size","","PCSPAISetBlockSize",ispai->block_size,&block_size1,&flg);CHKERRQ(ierr);
570     if (flg) {
571       ierr = PCSPAISetBlockSize(pc,block_size1);CHKERRQ(ierr);
572     }
573     ierr = PetscOptionsInt("-pc_spai_cache_size","","PCSPAISetCacheSize",ispai->cache_size,&cache_size,&flg);CHKERRQ(ierr);
574     if (flg) {
575       ierr = PCSPAISetCacheSize(pc,cache_size);CHKERRQ(ierr);
576     }
577     ierr = PetscOptionsInt("-pc_spai_verbose","","PCSPAISetVerbose",ispai->verbose,&verbose,&flg);CHKERRQ(ierr);
578     if (flg) {
579       ierr = PCSPAISetVerbose(pc,verbose);CHKERRQ(ierr);
580     }
581     ierr = PetscOptionsInt("-pc_spai_sp","","PCSPAISetSp",ispai->sp,&sp,&flg);CHKERRQ(ierr);
582     if (flg) {
583       ierr = PCSPAISetSp(pc,sp);CHKERRQ(ierr);
584     }
585   ierr = PetscOptionsTail();CHKERRQ(ierr);
586   PetscFunctionReturn(0);
587 }
588 
589 /**********************************************************************/
590 
591 /*MC
592    PCSPAI - Use the Sparse Approximate Inverse method of Grote and Barnard
593      as a preconditioner (SIAM J. Sci. Comput.; vol 18, nr 3)
594 
595    Options Database Keys:
596 +  -pc_spai_epsilon <eps> - set tolerance
597 .  -pc_spai_nbstep <n> - set nbsteps
598 .  -pc_spai_max <m> - set max
599 .  -pc_spai_max_new <m> - set maxnew
600 .  -pc_spai_block_size <n> - set block size
601 .  -pc_spai_cache_size <n> - set cache size
602 .  -pc_spai_sp <m> - set sp
603 -  -pc_spai_set_verbose <true,false> - verbose output
604 
605    Notes: This only works with AIJ matrices.
606 
607    Level: beginner
608 
609    Concepts: approximate inverse
610 
611 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,
612     PCSPAISetEpsilon(), PCSPAISetMax(), PCSPAISetMaxNew(), PCSPAISetBlockSize(),
613     PCSPAISetVerbose(), PCSPAISetSp()
614 M*/
615 
616 EXTERN_C_BEGIN
617 #undef __FUNCT__
618 #define __FUNCT__ "PCCreate_SPAI"
619 PetscErrorCode  PCCreate_SPAI(PC pc)
620 {
621   PC_SPAI        *ispai;
622   PetscErrorCode ierr;
623 
624   PetscFunctionBegin;
625   ierr               = PetscNewLog(pc,PC_SPAI,&ispai);CHKERRQ(ierr);
626   pc->data           = ispai;
627 
628   pc->ops->destroy         = PCDestroy_SPAI;
629   pc->ops->apply           = PCApply_SPAI;
630   pc->ops->applyrichardson = 0;
631   pc->ops->setup           = PCSetUp_SPAI;
632   pc->ops->view            = PCView_SPAI;
633   pc->ops->setfromoptions  = PCSetFromOptions_SPAI;
634 
635   ispai->epsilon    = .4;
636   ispai->nbsteps    = 5;
637   ispai->max        = 5000;
638   ispai->maxnew     = 5;
639   ispai->block_size = 1;
640   ispai->cache_size = 5;
641   ispai->verbose    = 0;
642 
643   ispai->sp         = 1;
644   ierr = MPI_Comm_dup(((PetscObject)pc)->comm,&(ispai->comm_spai));CHKERRQ(ierr);
645 
646   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetEpsilon_C",
647                     "PCSPAISetEpsilon_SPAI",
648                      PCSPAISetEpsilon_SPAI);CHKERRQ(ierr);
649   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetNBSteps_C",
650                     "PCSPAISetNBSteps_SPAI",
651                      PCSPAISetNBSteps_SPAI);CHKERRQ(ierr);
652   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetMax_C",
653                     "PCSPAISetMax_SPAI",
654                      PCSPAISetMax_SPAI);CHKERRQ(ierr);
655   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetMaxNew_CC",
656                     "PCSPAISetMaxNew_SPAI",
657                      PCSPAISetMaxNew_SPAI);CHKERRQ(ierr);
658   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetBlockSize_C",
659                     "PCSPAISetBlockSize_SPAI",
660                      PCSPAISetBlockSize_SPAI);CHKERRQ(ierr);
661   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetCacheSize_C",
662                     "PCSPAISetCacheSize_SPAI",
663                      PCSPAISetCacheSize_SPAI);CHKERRQ(ierr);
664   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetVerbose_C",
665                     "PCSPAISetVerbose_SPAI",
666                      PCSPAISetVerbose_SPAI);CHKERRQ(ierr);
667   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetSp_C",
668                     "PCSPAISetSp_SPAI",
669                      PCSPAISetSp_SPAI);CHKERRQ(ierr);
670 
671   PetscFunctionReturn(0);
672 }
673 EXTERN_C_END
674 
675 /**********************************************************************/
676 
677 /*
678    Converts from a PETSc matrix to an SPAI matrix
679 */
680 #undef __FUNCT__
681 #define __FUNCT__ "ConvertMatToMatrix"
682 PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A,Mat AT,matrix **B)
683 {
684   matrix                  *M;
685   int                     i,j,col;
686   int                     row_indx;
687   int                     len,pe,local_indx,start_indx;
688   int                     *mapping;
689   PetscErrorCode          ierr;
690   const int               *cols;
691   const double            *vals;
692   int                     *num_ptr,n,mnl,nnl,nz,rstart,rend;
693   PetscMPIInt             size,rank;
694   struct compressed_lines *rows;
695 
696   PetscFunctionBegin;
697   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
698   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
699   ierr = MatGetSize(A,&n,&n);CHKERRQ(ierr);
700   ierr = MatGetLocalSize(A,&mnl,&nnl);CHKERRQ(ierr);
701 
702   /*
703     not sure why a barrier is required. commenting out
704   ierr = MPI_Barrier(comm);CHKERRQ(ierr);
705   */
706 
707   M = new_matrix((SPAI_Comm)comm);
708 
709   M->n = n;
710   M->bs = 1;
711   M->max_block_size = 1;
712 
713   M->mnls          = (int*)malloc(sizeof(int)*size);
714   M->start_indices = (int*)malloc(sizeof(int)*size);
715   M->pe            = (int*)malloc(sizeof(int)*n);
716   M->block_sizes   = (int*)malloc(sizeof(int)*n);
717   for (i=0; i<n; i++) M->block_sizes[i] = 1;
718 
719   ierr = MPI_Allgather(&mnl,1,MPI_INT,M->mnls,1,MPI_INT,comm);CHKERRQ(ierr);
720 
721   M->start_indices[0] = 0;
722   for (i=1; i<size; i++) {
723     M->start_indices[i] = M->start_indices[i-1] + M->mnls[i-1];
724   }
725 
726   M->mnl = M->mnls[M->myid];
727   M->my_start_index = M->start_indices[M->myid];
728 
729   for (i=0; i<size; i++) {
730     start_indx = M->start_indices[i];
731     for (j=0; j<M->mnls[i]; j++)
732       M->pe[start_indx+j] = i;
733   }
734 
735   if (AT) {
736     M->lines = new_compressed_lines(M->mnls[rank],1);CHKERRQ(ierr);
737   } else {
738     M->lines = new_compressed_lines(M->mnls[rank],0);CHKERRQ(ierr);
739   }
740 
741   rows = M->lines;
742 
743   /* Determine the mapping from global indices to pointers */
744   ierr       = PetscMalloc(M->n*sizeof(int),&mapping);CHKERRQ(ierr);
745   pe         = 0;
746   local_indx = 0;
747   for (i=0; i<M->n; i++) {
748     if (local_indx >= M->mnls[pe]) {
749       pe++;
750       local_indx = 0;
751     }
752     mapping[i] = local_indx + M->start_indices[pe];
753     local_indx++;
754   }
755 
756 
757   ierr = PetscMalloc(mnl*sizeof(int),&num_ptr);CHKERRQ(ierr);
758 
759   /*********************************************************/
760   /************** Set up the row structure *****************/
761   /*********************************************************/
762 
763   /* count number of nonzeros in every row */
764   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
765   for (i=rstart; i<rend; i++) {
766     ierr = MatGetRow(A,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
767     ierr = MatRestoreRow(A,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
768   }
769 
770   /* allocate buffers */
771   len = 0;
772   for (i=0; i<mnl; i++) {
773     if (len < num_ptr[i]) len = num_ptr[i];
774   }
775 
776   for (i=rstart; i<rend; i++) {
777     row_indx             = i-rstart;
778     len                  = num_ptr[row_indx];
779     rows->ptrs[row_indx] = (int*)malloc(len*sizeof(int));
780     rows->A[row_indx]    = (double*)malloc(len*sizeof(double));
781   }
782 
783   /* copy the matrix */
784   for (i=rstart; i<rend; i++) {
785     row_indx = i - rstart;
786     ierr     = MatGetRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr);
787     for (j=0; j<nz; j++) {
788       col = cols[j];
789       len = rows->len[row_indx]++;
790       rows->ptrs[row_indx][len] = mapping[col];
791       rows->A[row_indx][len]    = vals[j];
792     }
793     rows->slen[row_indx] = rows->len[row_indx];
794     ierr = MatRestoreRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr);
795   }
796 
797 
798   /************************************************************/
799   /************** Set up the column structure *****************/
800   /*********************************************************/
801 
802   if (AT) {
803 
804     /* count number of nonzeros in every column */
805     for (i=rstart; i<rend; i++) {
806       ierr = MatGetRow(AT,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
807       ierr = MatRestoreRow(AT,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
808     }
809 
810     /* allocate buffers */
811     len = 0;
812     for (i=0; i<mnl; i++) {
813       if (len < num_ptr[i]) len = num_ptr[i];
814     }
815 
816     for (i=rstart; i<rend; i++) {
817       row_indx = i-rstart;
818       len      = num_ptr[row_indx];
819       rows->rptrs[row_indx] = (int*)malloc(len*sizeof(int));
820     }
821 
822     /* copy the matrix (i.e., the structure) */
823     for (i=rstart; i<rend; i++) {
824       row_indx = i - rstart;
825       ierr     = MatGetRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr);
826       for (j=0; j<nz; j++) {
827         col = cols[j];
828         len = rows->rlen[row_indx]++;
829         rows->rptrs[row_indx][len] = mapping[col];
830       }
831       ierr = MatRestoreRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr);
832     }
833   }
834 
835   ierr = PetscFree(num_ptr);CHKERRQ(ierr);
836   ierr = PetscFree(mapping);CHKERRQ(ierr);
837 
838   order_pointers(M);
839   M->maxnz = calc_maxnz(M);
840 
841   *B = M;
842 
843   PetscFunctionReturn(0);
844 }
845 
846 /**********************************************************************/
847 
848 /*
849    Converts from an SPAI matrix B  to a PETSc matrix PB.
850    This assumes that the the SPAI matrix B is stored in
851    COMPRESSED-ROW format.
852 */
853 #undef __FUNCT__
854 #define __FUNCT__ "ConvertMatrixToMat"
855 PetscErrorCode ConvertMatrixToMat(MPI_Comm comm,matrix *B,Mat *PB)
856 {
857   PetscMPIInt    size,rank;
858   PetscErrorCode ierr;
859   int            m,n,M,N;
860   int            d_nz,o_nz;
861   int            *d_nnz,*o_nnz;
862   int            i,k,global_row,global_col,first_diag_col,last_diag_col;
863   PetscScalar    val;
864 
865   PetscFunctionBegin;
866   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
867   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
868 
869   m = n = B->mnls[rank];
870   d_nz = o_nz = 0;
871 
872   /* Determine preallocation for MatCreateMPIAIJ */
873   ierr = PetscMalloc(m*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
874   ierr = PetscMalloc(m*sizeof(PetscInt),&o_nnz);CHKERRQ(ierr);
875   for (i=0; i<m; i++) d_nnz[i] = o_nnz[i] = 0;
876   first_diag_col = B->start_indices[rank];
877   last_diag_col = first_diag_col + B->mnls[rank];
878   for (i=0; i<B->mnls[rank]; i++) {
879     for (k=0; k<B->lines->len[i]; k++) {
880       global_col = B->lines->ptrs[i][k];
881       if ((global_col >= first_diag_col) && (global_col < last_diag_col)) d_nnz[i]++;
882       else o_nnz[i]++;
883     }
884   }
885 
886   M = N = B->n;
887   /* Here we only know how to create AIJ format */
888   ierr = MatCreate(comm,PB);CHKERRQ(ierr);
889   ierr = MatSetSizes(*PB,m,n,M,N);CHKERRQ(ierr);
890   ierr = MatSetType(*PB,MATAIJ);CHKERRQ(ierr);
891   ierr = MatSeqAIJSetPreallocation(*PB,d_nz,d_nnz);CHKERRQ(ierr);
892   ierr = MatMPIAIJSetPreallocation(*PB,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
893 
894   for (i=0; i<B->mnls[rank]; i++) {
895     global_row = B->start_indices[rank]+i;
896     for (k=0; k<B->lines->len[i]; k++) {
897       global_col = B->lines->ptrs[i][k];
898       val = B->lines->A[i][k];
899       ierr = MatSetValues(*PB,1,&global_row,1,&global_col,&val,ADD_VALUES);CHKERRQ(ierr);
900     }
901   }
902 
903   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
904   ierr = PetscFree(o_nnz);CHKERRQ(ierr);
905 
906   ierr = MatAssemblyBegin(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
907   ierr = MatAssemblyEnd(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
908 
909   PetscFunctionReturn(0);
910 }
911 
912 /**********************************************************************/
913 
914 /*
915    Converts from an SPAI vector v  to a PETSc vec Pv.
916 */
917 #undef __FUNCT__
918 #define __FUNCT__ "ConvertVectorToVec"
919 PetscErrorCode ConvertVectorToVec(MPI_Comm comm,vector *v,Vec *Pv)
920 {
921   PetscErrorCode ierr;
922   PetscMPIInt    size,rank;
923   int            m,M,i,*mnls,*start_indices,*global_indices;
924 
925   PetscFunctionBegin;
926   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
927   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
928 
929   m = v->mnl;
930   M = v->n;
931 
932 
933   ierr = VecCreateMPI(comm,m,M,Pv);CHKERRQ(ierr);
934 
935   ierr = PetscMalloc(size*sizeof(int),&mnls);CHKERRQ(ierr);
936   ierr = MPI_Allgather(&v->mnl,1,MPI_INT,mnls,1,MPI_INT,comm);CHKERRQ(ierr);
937 
938   ierr = PetscMalloc(size*sizeof(int),&start_indices);CHKERRQ(ierr);
939   start_indices[0] = 0;
940   for (i=1; i<size; i++)
941     start_indices[i] = start_indices[i-1] +mnls[i-1];
942 
943   ierr = PetscMalloc(v->mnl*sizeof(int),&global_indices);CHKERRQ(ierr);
944   for (i=0; i<v->mnl; i++)
945     global_indices[i] = start_indices[rank] + i;
946 
947   ierr = PetscFree(mnls);CHKERRQ(ierr);
948   ierr = PetscFree(start_indices);CHKERRQ(ierr);
949 
950   ierr = VecSetValues(*Pv,v->mnl,global_indices,v->v,INSERT_VALUES);CHKERRQ(ierr);
951   ierr = VecAssemblyBegin(*Pv);CHKERRQ(ierr);
952   ierr = VecAssemblyEnd(*Pv);CHKERRQ(ierr);
953 
954   ierr = PetscFree(global_indices);CHKERRQ(ierr);
955   PetscFunctionReturn(0);
956 }
957 
958 
959 
960 
961