xref: /petsc/src/ksp/pc/impls/spai/ispai.c (revision 01a81e61ce6df5eae5e8bb0a164ea9f70eac2b9c)
1 #define PETSCKSP_DLL
2 
3 /*
4    3/99 Modified by Stephen Barnard to support SPAI version 3.0
5 */
6 
7 /*
8       Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner
9    Code written by Stephen Barnard.
10 
11       Note: there is some BAD memory bleeding below!
12 
13       This code needs work
14 
15    1) get rid of all memory bleeding
16    2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix
17       rather than having the sp flag for PC_SPAI
18 
19 */
20 
21 #include "src/ksp/pc/pcimpl.h"        /*I "petscpc.h" I*/
22 
23 /*
24     These are the SPAI include files
25 */
26 EXTERN_C_BEGIN
27 #include "spai.h"
28 #include "matrix.h"
29 EXTERN_C_END
30 
31 EXTERN PetscErrorCode ConvertMatToMatrix(MPI_Comm,Mat,Mat,matrix**);
32 EXTERN PetscErrorCode ConvertMatrixToMat(MPI_Comm,matrix *,Mat *);
33 EXTERN PetscErrorCode ConvertVectorToVec(MPI_Comm,vector *v,Vec *Pv);
34 EXTERN PetscErrorCode MM_to_PETSC(char *,char *,char *);
35 
36 typedef struct {
37 
38   matrix *B;              /* matrix in SPAI format */
39   matrix *BT;             /* transpose of matrix in SPAI format */
40   matrix *M;              /* the approximate inverse in SPAI format */
41 
42   Mat    PM;              /* the approximate inverse PETSc format */
43 
44   double epsilon;         /* tolerance */
45   int    nbsteps;         /* max number of "improvement" steps per line */
46   int    max;             /* max dimensions of is_I, q, etc. */
47   int    maxnew;          /* max number of new entries per step */
48   int    block_size;      /* constant block size */
49   int    cache_size;      /* one of (1,2,3,4,5,6) indicting size of cache */
50   int    verbose;         /* SPAI prints timing and statistics */
51 
52   int    sp;              /* symmetric nonzero pattern */
53   MPI_Comm comm_spai;     /* communicator to be used with spai */
54 } PC_SPAI;
55 
56 /**********************************************************************/
57 
58 #undef __FUNCT__
59 #define __FUNCT__ "PCSetUp_SPAI"
60 static PetscErrorCode PCSetUp_SPAI(PC pc)
61 {
62   PC_SPAI *ispai = (PC_SPAI*)pc->data;
63   PetscErrorCode ierr;
64   Mat      AT;
65 
66   PetscFunctionBegin;
67 
68   init_SPAI();
69 
70   if (ispai->sp) {
71     ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,pc->pmat,&ispai->B);CHKERRQ(ierr);
72   } else {
73     /* Use the transpose to get the column nonzero structure. */
74     ierr = MatTranspose(pc->pmat,&AT);CHKERRQ(ierr);
75     ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,AT,&ispai->B);CHKERRQ(ierr);
76     ierr = MatDestroy(AT);CHKERRQ(ierr);
77   }
78 
79   /* Destroy the transpose */
80   /* Don't know how to do it. PETSc developers? */
81 
82   /* construct SPAI preconditioner */
83   /* FILE *messages */     /* file for warning messages */
84   /* double epsilon */     /* tolerance */
85   /* int nbsteps */        /* max number of "improvement" steps per line */
86   /* int max */            /* max dimensions of is_I, q, etc. */
87   /* int maxnew */         /* max number of new entries per step */
88   /* int block_size */     /* block_size == 1 specifies scalar elments
89                               block_size == n specifies nxn constant-block elements
90                               block_size == 0 specifies variable-block elements */
91   /* int cache_size */     /* one of (1,2,3,4,5,6) indicting size of cache */
92                            /* cache_size == 0 indicates no caching */
93   /* int    verbose    */  /* verbose == 0 specifies that SPAI is silent
94                               verbose == 1 prints timing and matrix statistics */
95 
96   ierr = bspai(ispai->B,&ispai->M,
97 		   stdout,
98 		   ispai->epsilon,
99 		   ispai->nbsteps,
100 		   ispai->max,
101 		   ispai->maxnew,
102 		   ispai->block_size,
103 		   ispai->cache_size,
104 	       ispai->verbose); CHKERRQ(ierr);
105 
106   ierr = ConvertMatrixToMat(pc->comm,ispai->M,&ispai->PM);CHKERRQ(ierr);
107 
108   /* free the SPAI matrices */
109   sp_free_matrix(ispai->B);
110   sp_free_matrix(ispai->M);
111 
112   PetscFunctionReturn(0);
113 }
114 
115 /**********************************************************************/
116 
117 #undef __FUNCT__
118 #define __FUNCT__ "PCApply_SPAI"
119 static PetscErrorCode PCApply_SPAI(PC pc,Vec xx,Vec y)
120 {
121   PC_SPAI *ispai = (PC_SPAI*)pc->data;
122   PetscErrorCode ierr;
123 
124   PetscFunctionBegin;
125   /* Now using PETSc's multiply */
126   ierr = MatMult(ispai->PM,xx,y);CHKERRQ(ierr);
127   PetscFunctionReturn(0);
128 }
129 
130 /**********************************************************************/
131 
132 #undef __FUNCT__
133 #define __FUNCT__ "PCDestroy_SPAI"
134 static PetscErrorCode PCDestroy_SPAI(PC pc)
135 {
136   PetscErrorCode ierr;
137   PC_SPAI *ispai = (PC_SPAI*)pc->data;
138 
139   PetscFunctionBegin;
140   if (ispai->PM) {ierr = MatDestroy(ispai->PM);CHKERRQ(ierr);}
141   ierr = MPI_Comm_free(&(ispai->comm_spai));CHKERRQ(ierr);
142   ierr = PetscFree(ispai);CHKERRQ(ierr);
143   PetscFunctionReturn(0);
144 }
145 
146 /**********************************************************************/
147 
148 #undef __FUNCT__
149 #define __FUNCT__ "PCView_SPAI"
150 static PetscErrorCode PCView_SPAI(PC pc,PetscViewer viewer)
151 {
152   PC_SPAI    *ispai = (PC_SPAI*)pc->data;
153   PetscErrorCode ierr;
154   PetscTruth iascii;
155 
156   PetscFunctionBegin;
157   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
158   if (iascii) {
159     ierr = PetscViewerASCIIPrintf(viewer,"    SPAI preconditioner\n");CHKERRQ(ierr);
160     ierr = PetscViewerASCIIPrintf(viewer,"    epsilon %g\n",   ispai->epsilon);CHKERRQ(ierr);
161     ierr = PetscViewerASCIIPrintf(viewer,"    nbsteps %d\n",   ispai->nbsteps);CHKERRQ(ierr);
162     ierr = PetscViewerASCIIPrintf(viewer,"    max %d\n",       ispai->max);CHKERRQ(ierr);
163     ierr = PetscViewerASCIIPrintf(viewer,"    maxnew %d\n",    ispai->maxnew);CHKERRQ(ierr);
164     ierr = PetscViewerASCIIPrintf(viewer,"    block_size %d\n",ispai->block_size);CHKERRQ(ierr);
165     ierr = PetscViewerASCIIPrintf(viewer,"    cache_size %d\n",ispai->cache_size);CHKERRQ(ierr);
166     ierr = PetscViewerASCIIPrintf(viewer,"    verbose %d\n",   ispai->verbose);CHKERRQ(ierr);
167     ierr = PetscViewerASCIIPrintf(viewer,"    sp %d\n",        ispai->sp);CHKERRQ(ierr);
168 
169   }
170   PetscFunctionReturn(0);
171 }
172 
173 EXTERN_C_BEGIN
174 #undef __FUNCT__
175 #define __FUNCT__ "PCSPAISetEpsilon_SPAI"
176 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetEpsilon_SPAI(PC pc,double epsilon1)
177 {
178   PC_SPAI    *ispai = (PC_SPAI*)pc->data;
179   PetscFunctionBegin;
180   ispai->epsilon = epsilon1;
181   PetscFunctionReturn(0);
182 }
183 EXTERN_C_END
184 
185 /**********************************************************************/
186 
187 EXTERN_C_BEGIN
188 #undef __FUNCT__
189 #define __FUNCT__ "PCSPAISetNBSteps_SPAI"
190 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetNBSteps_SPAI(PC pc,int nbsteps1)
191 {
192   PC_SPAI    *ispai = (PC_SPAI*)pc->data;
193   PetscFunctionBegin;
194   ispai->nbsteps = nbsteps1;
195   PetscFunctionReturn(0);
196 }
197 EXTERN_C_END
198 
199 /**********************************************************************/
200 
201 /* added 1/7/99 g.h. */
202 EXTERN_C_BEGIN
203 #undef __FUNCT__
204 #define __FUNCT__ "PCSPAISetMax_SPAI"
205 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetMax_SPAI(PC pc,int max1)
206 {
207   PC_SPAI    *ispai = (PC_SPAI*)pc->data;
208   PetscFunctionBegin;
209   ispai->max = max1;
210   PetscFunctionReturn(0);
211 }
212 EXTERN_C_END
213 
214 /**********************************************************************/
215 
216 EXTERN_C_BEGIN
217 #undef __FUNCT__
218 #define __FUNCT__ "PCSPAISetMaxNew_SPAI"
219 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetMaxNew_SPAI(PC pc,int maxnew1)
220 {
221   PC_SPAI    *ispai = (PC_SPAI*)pc->data;
222   PetscFunctionBegin;
223   ispai->maxnew = maxnew1;
224   PetscFunctionReturn(0);
225 }
226 EXTERN_C_END
227 
228 /**********************************************************************/
229 
230 EXTERN_C_BEGIN
231 #undef __FUNCT__
232 #define __FUNCT__ "PCSPAISetBlockSize_SPAI"
233 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetBlockSize_SPAI(PC pc,int block_size1)
234 {
235   PC_SPAI    *ispai = (PC_SPAI*)pc->data;
236   PetscFunctionBegin;
237   ispai->block_size = block_size1;
238   PetscFunctionReturn(0);
239 }
240 EXTERN_C_END
241 
242 /**********************************************************************/
243 
244 EXTERN_C_BEGIN
245 #undef __FUNCT__
246 #define __FUNCT__ "PCSPAISetCacheSize_SPAI"
247 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetCacheSize_SPAI(PC pc,int cache_size)
248 {
249   PC_SPAI    *ispai = (PC_SPAI*)pc->data;
250   PetscFunctionBegin;
251   ispai->cache_size = cache_size;
252   PetscFunctionReturn(0);
253 }
254 EXTERN_C_END
255 
256 /**********************************************************************/
257 
258 EXTERN_C_BEGIN
259 #undef __FUNCT__
260 #define __FUNCT__ "PCSPAISetVerbose_SPAI"
261 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetVerbose_SPAI(PC pc,int verbose)
262 {
263   PC_SPAI    *ispai = (PC_SPAI*)pc->data;
264   PetscFunctionBegin;
265   ispai->verbose = verbose;
266   PetscFunctionReturn(0);
267 }
268 EXTERN_C_END
269 
270 /**********************************************************************/
271 
272 EXTERN_C_BEGIN
273 #undef __FUNCT__
274 #define __FUNCT__ "PCSPAISetSp_SPAI"
275 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetSp_SPAI(PC pc,int sp)
276 {
277   PC_SPAI    *ispai = (PC_SPAI*)pc->data;
278   PetscFunctionBegin;
279   ispai->sp = sp;
280   PetscFunctionReturn(0);
281 }
282 EXTERN_C_END
283 
284 /* -------------------------------------------------------------------*/
285 
286 #undef __FUNCT__
287 #define __FUNCT__ "PCSPAISetEpsilon"
288 /*@
289   PCSPAISetEpsilon -- Set the tolerance for the SPAI preconditioner
290 
291   Input Parameters:
292 + pc - the preconditioner
293 - eps - epsilon (default .4)
294 
295   Notes:  Espilon must be between 0 and 1. It controls the
296                  quality of the approximation of M to the inverse of
297                  A. Higher values of epsilon lead to more work, more
298                  fill, and usually better preconditioners. In many
299                  cases the best choice of epsilon is the one that
300                  divides the total solution time equally between the
301                  preconditioner and the solver.
302 
303   Level: intermediate
304 
305 .seealso: PCSPAI, PCSetType()
306   @*/
307 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetEpsilon(PC pc,double epsilon1)
308 {
309   PetscErrorCode ierr,(*f)(PC,double);
310   PetscFunctionBegin;
311   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetEpsilon_C",(void (**)(void))&f);CHKERRQ(ierr);
312   if (f) {
313     ierr = (*f)(pc,epsilon1);CHKERRQ(ierr);
314   }
315   PetscFunctionReturn(0);
316 }
317 
318 /**********************************************************************/
319 
320 #undef __FUNCT__
321 #define __FUNCT__ "PCSPAISetNBSteps"
322 /*@
323   PCSPAISetNBSteps - set maximum number of improvement steps per row in
324         the SPAI preconditioner
325 
326   Input Parameters:
327 + pc - the preconditioner
328 - n - number of steps (default 5)
329 
330   Notes:  SPAI constructs to approximation to every column of
331                  the exact inverse of A in a series of improvement
332                  steps. The quality of the approximation is determined
333                  by epsilon. If an approximation achieving an accuracy
334                  of epsilon is not obtained after ns steps, SPAI simply
335                  uses the best approximation constructed so far.
336 
337   Level: intermediate
338 
339 .seealso: PCSPAI, PCSetType(), PCSPAISetMaxNew()
340 @*/
341 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetNBSteps(PC pc,int nbsteps1)
342 {
343   PetscErrorCode ierr,(*f)(PC,int);
344   PetscFunctionBegin;
345   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetNBSteps_C",(void (**)(void))&f);CHKERRQ(ierr);
346   if (f) {
347     ierr = (*f)(pc,nbsteps1);CHKERRQ(ierr);
348   }
349   PetscFunctionReturn(0);
350 }
351 
352 /**********************************************************************/
353 
354 /* added 1/7/99 g.h. */
355 #undef __FUNCT__
356 #define __FUNCT__ "PCSPAISetMax"
357 /*@
358   PCSPAISetMax - set the size of various working buffers in
359         the SPAI preconditioner
360 
361   Input Parameters:
362 + pc - the preconditioner
363 - n - size (default is 5000)
364 
365   Level: intermediate
366 
367 .seealso: PCSPAI, PCSetType()
368 @*/
369 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetMax(PC pc,int max1)
370 {
371   PetscErrorCode ierr,(*f)(PC,int);
372   PetscFunctionBegin;
373   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetMax_C",(void (**)(void))&f);CHKERRQ(ierr);
374   if (f) {
375     ierr = (*f)(pc,max1);CHKERRQ(ierr);
376   }
377   PetscFunctionReturn(0);
378 }
379 
380 /**********************************************************************/
381 
382 #undef __FUNCT__
383 #define __FUNCT__ "PCSPAISetMaxNew"
384 /*@
385   PCSPAISetMaxNew - set maximum number of new nonzero candidates per step
386    in SPAI preconditioner
387 
388   Input Parameters:
389 + pc - the preconditioner
390 - n - maximum number (default 5)
391 
392   Level: intermediate
393 
394 .seealso: PCSPAI, PCSetType(), PCSPAISetNBSteps()
395 @*/
396 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetMaxNew(PC pc,int maxnew1)
397 {
398   PetscErrorCode ierr,(*f)(PC,int);
399   PetscFunctionBegin;
400   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetMaxNew_C",(void (**)(void))&f);CHKERRQ(ierr);
401   if (f) {
402     ierr = (*f)(pc,maxnew1);CHKERRQ(ierr);
403   }
404   PetscFunctionReturn(0);
405 }
406 
407 /**********************************************************************/
408 
409 #undef __FUNCT__
410 #define __FUNCT__ "PCSPAISetBlockSize"
411 /*@
412   PCSPAISetBlockSize - set the block size for the SPAI preconditioner
413 
414   Input Parameters:
415 + pc - the preconditioner
416 - n - block size (default 1)
417 
418   Notes: A block
419                  size of 1 treats A as a matrix of scalar elements. A
420                  block size of s > 1 treats A as a matrix of sxs
421                  blocks. A block size of 0 treats A as a matrix with
422                  variable sized blocks, which are determined by
423                  searching for dense square diagonal blocks in A.
424                  This can be very effective for finite-element
425                  matrices.
426 
427                  SPAI will convert A to block form, use a block
428                  version of the preconditioner algorithm, and then
429                  convert the result back to scalar form.
430 
431                  In many cases the a block-size parameter other than 1
432                  can lead to very significant improvement in
433                  performance.
434 
435 
436   Level: intermediate
437 
438 .seealso: PCSPAI, PCSetType()
439 @*/
440 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetBlockSize(PC pc,int block_size1)
441 {
442   PetscErrorCode ierr,(*f)(PC,int);
443   PetscFunctionBegin;
444   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetBlockSize_C",(void (**)(void))&f);CHKERRQ(ierr);
445   if (f) {
446     ierr = (*f)(pc,block_size1);CHKERRQ(ierr);
447   }
448   PetscFunctionReturn(0);
449 }
450 
451 /**********************************************************************/
452 
453 #undef __FUNCT__
454 #define __FUNCT__ "PCSPAISetCacheSize"
455 /*@
456   PCSPAISetCacheSize - specify cache size in the SPAI preconditioner
457 
458   Input Parameters:
459 + pc - the preconditioner
460 - n -  cache size {0,1,2,3,4,5} (default 5)
461 
462   Notes:    SPAI uses a hash table to cache messages and avoid
463                  redundant communication. If suggest always using
464                  5. This parameter is irrelevant in the serial
465                  version.
466 
467   Level: intermediate
468 
469 .seealso: PCSPAI, PCSetType()
470 @*/
471 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetCacheSize(PC pc,int cache_size)
472 {
473   PetscErrorCode ierr,(*f)(PC,int);
474   PetscFunctionBegin;
475   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetCacheSize_C",(void (**)(void))&f);CHKERRQ(ierr);
476   if (f) {
477     ierr = (*f)(pc,cache_size);CHKERRQ(ierr);
478   }
479   PetscFunctionReturn(0);
480 }
481 
482 /**********************************************************************/
483 
484 #undef __FUNCT__
485 #define __FUNCT__ "PCSPAISetVerbose"
486 /*@
487   PCSPAISetVerbose - verbosity level for the SPAI preconditioner
488 
489   Input Parameters:
490 + pc - the preconditioner
491 - n - level (default 1)
492 
493   Notes: print parameters, timings and matrix statistics
494 
495   Level: intermediate
496 
497 .seealso: PCSPAI, PCSetType()
498 @*/
499 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetVerbose(PC pc,int verbose)
500 {
501   PetscErrorCode ierr,(*f)(PC,int);
502   PetscFunctionBegin;
503   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetVerbose_C",(void (**)(void))&f);CHKERRQ(ierr);
504   if (f) {
505     ierr = (*f)(pc,verbose);CHKERRQ(ierr);
506   }
507   PetscFunctionReturn(0);
508 }
509 
510 /**********************************************************************/
511 
512 #undef __FUNCT__
513 #define __FUNCT__ "PCSPAISetSp"
514 /*@
515   PCSPAISetSp - specify a symmetric matrix sparsity pattern in the SPAI preconditioner
516 
517   Input Parameters:
518 + pc - the preconditioner
519 - n - 0 or 1
520 
521   Notes: If A has a symmetric nonzero pattern use -sp 1 to
522                  improve performance by eliminating some communication
523                  in the parallel version. Even if A does not have a
524                  symmetric nonzero pattern -sp 1 may well lead to good
525                  results, but the code will not follow the published
526                  SPAI algorithm exactly.
527 
528 
529   Level: intermediate
530 
531 .seealso: PCSPAI, PCSetType()
532 @*/
533 PetscErrorCode PETSCKSP_DLLEXPORT PCSPAISetSp(PC pc,int sp)
534 {
535   PetscErrorCode ierr,(*f)(PC,int);
536   PetscFunctionBegin;
537   ierr = PetscObjectQueryFunction((PetscObject)pc,"PCSPAISetSp_C",(void (**)(void))&f);CHKERRQ(ierr);
538   if (f) {
539     ierr = (*f)(pc,sp);CHKERRQ(ierr);
540   }
541   PetscFunctionReturn(0);
542 }
543 
544 /**********************************************************************/
545 
546 /**********************************************************************/
547 
548 #undef __FUNCT__
549 #define __FUNCT__ "PCSetFromOptions_SPAI"
550 static PetscErrorCode PCSetFromOptions_SPAI(PC pc)
551 {
552   PC_SPAI    *ispai = (PC_SPAI*)pc->data;
553   PetscErrorCode ierr;
554   int nbsteps1,max1,maxnew1,block_size1,cache_size,verbose,sp;
555   double     epsilon1;
556   PetscTruth flg;
557 
558   PetscFunctionBegin;
559   ierr = PetscOptionsHead("SPAI options");CHKERRQ(ierr);
560     ierr = PetscOptionsReal("-pc_spai_epsilon","","PCSPAISetEpsilon",ispai->epsilon,&epsilon1,&flg);CHKERRQ(ierr);
561     if (flg) {
562       ierr = PCSPAISetEpsilon(pc,epsilon1);CHKERRQ(ierr);
563     }
564     ierr = PetscOptionsInt("-pc_spai_nbsteps","","PCSPAISetNBSteps",ispai->nbsteps,&nbsteps1,&flg);CHKERRQ(ierr);
565     if (flg) {
566       ierr = PCSPAISetNBSteps(pc,nbsteps1);CHKERRQ(ierr);
567     }
568     /* added 1/7/99 g.h. */
569     ierr = PetscOptionsInt("-pc_spai_max","","PCSPAISetMax",ispai->max,&max1,&flg);CHKERRQ(ierr);
570     if (flg) {
571       ierr = PCSPAISetMax(pc,max1);CHKERRQ(ierr);
572     }
573     ierr = PetscOptionsInt("-pc_spai_maxnew","","PCSPAISetMaxNew",ispai->maxnew,&maxnew1,&flg);CHKERRQ(ierr);
574     if (flg) {
575       ierr = PCSPAISetMaxNew(pc,maxnew1);CHKERRQ(ierr);
576     }
577     ierr = PetscOptionsInt("-pc_spai_block_size","","PCSPAISetBlockSize",ispai->block_size,&block_size1,&flg);CHKERRQ(ierr);
578     if (flg) {
579       ierr = PCSPAISetBlockSize(pc,block_size1);CHKERRQ(ierr);
580     }
581     ierr = PetscOptionsInt("-pc_spai_cache_size","","PCSPAISetCacheSize",ispai->cache_size,&cache_size,&flg);CHKERRQ(ierr);
582     if (flg) {
583       ierr = PCSPAISetCacheSize(pc,cache_size);CHKERRQ(ierr);
584     }
585     ierr = PetscOptionsInt("-pc_spai_verbose","","PCSPAISetVerbose",ispai->verbose,&verbose,&flg);CHKERRQ(ierr);
586     if (flg) {
587       ierr = PCSPAISetVerbose(pc,verbose);CHKERRQ(ierr);
588     }
589     ierr = PetscOptionsInt("-pc_spai_sp","","PCSPAISetSp",ispai->sp,&sp,&flg);CHKERRQ(ierr);
590     if (flg) {
591       ierr = PCSPAISetSp(pc,sp);CHKERRQ(ierr);
592     }
593   ierr = PetscOptionsTail();CHKERRQ(ierr);
594   PetscFunctionReturn(0);
595 }
596 
597 /**********************************************************************/
598 
599 /*MC
600    PCSPAI - Use the Sparse Approximate Inverse method of Grote and Barnard
601      as a preconditioner (SIAM J. Sci. Comput.; vol 18, nr 3)
602 
603    Options Database Keys:
604 +  -pc_spai_set_epsilon <eps> - set tolerance
605 .  -pc_spai_nbstep <n> - set nbsteps
606 .  -pc_spai_max <m> - set max
607 .  -pc_spai_max_new <m> - set maxnew
608 .  -pc_spai_block_size <n> - set block size
609 .  -pc_spai_cache_size <n> - set cache size
610 .  -pc_spai_sp <m> - set sp
611 -  -pc_spai_set_verbose <true,false> - verbose output
612 
613    Notes: This only works with AIJ matrices.
614 
615    Level: beginner
616 
617    Concepts: approximate inverse
618 
619 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,
620     PCSPAISetEpsilon(), PCSPAISetMax(), PCSPAISetMaxNew(), PCSPAISetBlockSize(),
621     PCSPAISetVerbose(), PCSPAISetSp()
622 M*/
623 
624 EXTERN_C_BEGIN
625 #undef __FUNCT__
626 #define __FUNCT__ "PCCreate_SPAI"
627 PetscErrorCode PETSCKSP_DLLEXPORT PCCreate_SPAI(PC pc)
628 {
629   PC_SPAI *ispai;
630   PetscErrorCode ierr;
631 
632   PetscFunctionBegin;
633   ierr               = PetscNew(PC_SPAI,&ispai);CHKERRQ(ierr);
634   pc->data           = (void*)ispai;
635 
636   pc->ops->destroy         = PCDestroy_SPAI;
637   pc->ops->apply           = PCApply_SPAI;
638   pc->ops->applyrichardson = 0;
639   pc->ops->setup           = PCSetUp_SPAI;
640   pc->ops->view            = PCView_SPAI;
641   pc->ops->setfromoptions  = PCSetFromOptions_SPAI;
642 
643   pc->name          = 0;
644   ispai->epsilon    = .4;
645   ispai->nbsteps    = 5;
646   ispai->max        = 5000;
647   ispai->maxnew     = 5;
648   ispai->block_size = 1;
649   ispai->cache_size = 5;
650   ispai->verbose    = 0;
651 
652   ispai->sp         = 1;
653   ierr = MPI_Comm_dup(pc->comm,&(ispai->comm_spai));CHKERRQ(ierr);
654 
655   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetEpsilon_C",
656                     "PCSPAISetEpsilon_SPAI",
657                      PCSPAISetEpsilon_SPAI);CHKERRQ(ierr);
658   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetNBSteps_C",
659                     "PCSPAISetNBSteps_SPAI",
660                      PCSPAISetNBSteps_SPAI);CHKERRQ(ierr);
661   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetMax_C",
662                     "PCSPAISetMax_SPAI",
663                      PCSPAISetMax_SPAI);CHKERRQ(ierr);
664   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetMaxNew_CC",
665                     "PCSPAISetMaxNew_SPAI",
666                      PCSPAISetMaxNew_SPAI);CHKERRQ(ierr);
667   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetBlockSize_C",
668                     "PCSPAISetBlockSize_SPAI",
669                      PCSPAISetBlockSize_SPAI);CHKERRQ(ierr);
670   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetCacheSize_C",
671                     "PCSPAISetCacheSize_SPAI",
672                      PCSPAISetCacheSize_SPAI);CHKERRQ(ierr);
673   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetVerbose_C",
674                     "PCSPAISetVerbose_SPAI",
675                      PCSPAISetVerbose_SPAI);CHKERRQ(ierr);
676   ierr = PetscObjectComposeFunctionDynamic((PetscObject)pc,"PCSPAISetSp_C",
677                     "PCSPAISetSp_SPAI",
678                      PCSPAISetSp_SPAI);CHKERRQ(ierr);
679 
680   PetscFunctionReturn(0);
681 }
682 EXTERN_C_END
683 
684 /**********************************************************************/
685 
686 /*
687    Converts from a PETSc matrix to an SPAI matrix
688 */
689 #undef __FUNCT__
690 #define __FUNCT__ "ConvertMatToMatrix"
691 PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A,Mat AT,matrix **B)
692 {
693   matrix                  *M;
694   int                     i,j,col;
695   int                     row_indx;
696   int                     len,pe,local_indx,start_indx;
697   int                     *mapping;
698   PetscErrorCode ierr;
699   const int               *cols;
700   const double            *vals;
701   int                     *num_ptr,n,mnl,nnl,rank,size,nz,rstart,rend;
702   struct compressed_lines *rows;
703 
704   PetscFunctionBegin;
705 
706   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
707   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
708   ierr = MatGetSize(A,&n,&n);CHKERRQ(ierr);
709   ierr = MatGetLocalSize(A,&mnl,&nnl);CHKERRQ(ierr);
710 
711   /*
712     not sure why a barrier is required. commenting out
713   ierr = MPI_Barrier(comm);CHKERRQ(ierr);
714   */
715 
716   M = new_matrix((void*)comm);
717 
718   M->n = n;
719   M->bs = 1;
720   M->max_block_size = 1;
721 
722   M->mnls          = (int*)malloc(sizeof(int)*size);
723   M->start_indices = (int*)malloc(sizeof(int)*size);
724   M->pe            = (int*)malloc(sizeof(int)*n);
725   M->block_sizes   = (int*)malloc(sizeof(int)*n);
726   for (i=0; i<n; i++) M->block_sizes[i] = 1;
727 
728   ierr = MPI_Allgather(&mnl,1,MPI_INT,M->mnls,1,MPI_INT,comm);CHKERRQ(ierr);
729 
730   M->start_indices[0] = 0;
731   for (i=1; i<size; i++) {
732     M->start_indices[i] = M->start_indices[i-1] + M->mnls[i-1];
733   }
734 
735   M->mnl = M->mnls[M->myid];
736   M->my_start_index = M->start_indices[M->myid];
737 
738   for (i=0; i<size; i++) {
739     start_indx = M->start_indices[i];
740     for (j=0; j<M->mnls[i]; j++)
741       M->pe[start_indx+j] = i;
742   }
743 
744   if (AT) {
745     M->lines = new_compressed_lines(M->mnls[rank],1);CHKERRQ(ierr);
746   } else {
747     M->lines = new_compressed_lines(M->mnls[rank],0);CHKERRQ(ierr);
748   }
749 
750   rows     = M->lines;
751 
752   /* Determine the mapping from global indices to pointers */
753   ierr       = PetscMalloc(M->n*sizeof(int),&mapping);CHKERRQ(ierr);
754   pe         = 0;
755   local_indx = 0;
756   for (i=0; i<M->n; i++) {
757     if (local_indx >= M->mnls[pe]) {
758       pe++;
759       local_indx = 0;
760     }
761     mapping[i] = local_indx + M->start_indices[pe];
762     local_indx++;
763   }
764 
765 
766   ierr = PetscMalloc(mnl*sizeof(int),&num_ptr);CHKERRQ(ierr);
767 
768   /*********************************************************/
769   /************** Set up the row structure *****************/
770   /*********************************************************/
771 
772   /* count number of nonzeros in every row */
773   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
774   for (i=rstart; i<rend; i++) {
775     ierr = MatGetRow(A,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
776     ierr = MatRestoreRow(A,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
777   }
778 
779   /* allocate buffers */
780   len = 0;
781   for (i=0; i<mnl; i++) {
782     if (len < num_ptr[i]) len = num_ptr[i];
783   }
784 
785   for (i=rstart; i<rend; i++) {
786     row_indx             = i-rstart;
787     len                  = num_ptr[row_indx];
788     rows->ptrs[row_indx] = (int*)malloc(len*sizeof(int));
789     rows->A[row_indx]    = (double*)malloc(len*sizeof(double));
790   }
791 
792   /* copy the matrix */
793   for (i=rstart; i<rend; i++) {
794     row_indx = i - rstart;
795     ierr     = MatGetRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr);
796     for (j=0; j<nz; j++) {
797       col = cols[j];
798       len = rows->len[row_indx]++;
799       rows->ptrs[row_indx][len] = mapping[col];
800       rows->A[row_indx][len]    = vals[j];
801     }
802     rows->slen[row_indx] = rows->len[row_indx];
803     ierr = MatRestoreRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr);
804   }
805 
806 
807   /************************************************************/
808   /************** Set up the column structure *****************/
809   /*********************************************************/
810 
811   if (AT) {
812 
813     /* count number of nonzeros in every column */
814     for (i=rstart; i<rend; i++) {
815       ierr = MatGetRow(AT,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
816       ierr = MatRestoreRow(AT,i,&num_ptr[i-rstart],PETSC_NULL,PETSC_NULL);CHKERRQ(ierr);
817     }
818 
819     /* allocate buffers */
820     len = 0;
821     for (i=0; i<mnl; i++) {
822       if (len < num_ptr[i]) len = num_ptr[i];
823     }
824 
825     for (i=rstart; i<rend; i++) {
826       row_indx = i-rstart;
827       len      = num_ptr[row_indx];
828       rows->rptrs[row_indx]     = (int*)malloc(len*sizeof(int));
829     }
830 
831     /* copy the matrix (i.e., the structure) */
832     for (i=rstart; i<rend; i++) {
833       row_indx = i - rstart;
834       ierr     = MatGetRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr);
835       for (j=0; j<nz; j++) {
836 	col = cols[j];
837 	len = rows->rlen[row_indx]++;
838 	rows->rptrs[row_indx][len] = mapping[col];
839       }
840       ierr     = MatRestoreRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr);
841     }
842   }
843 
844   ierr = PetscFree(num_ptr);CHKERRQ(ierr);
845   ierr = PetscFree(mapping);CHKERRQ(ierr);
846 
847   order_pointers(M);
848   M->maxnz = calc_maxnz(M);
849 
850   *B = M;
851 
852   PetscFunctionReturn(0);
853 }
854 
855 /**********************************************************************/
856 
857 /*
858    Converts from an SPAI matrix B  to a PETSc matrix PB.
859    This assumes that the the SPAI matrix B is stored in
860    COMPRESSED-ROW format.
861 */
862 #undef __FUNCT__
863 #define __FUNCT__ "ConvertMatrixToMat"
864 PetscErrorCode ConvertMatrixToMat(MPI_Comm comm,matrix *B,Mat *PB)
865 {
866   int         size,rank;
867   PetscErrorCode ierr;
868   int         m,n,M,N;
869   int         d_nz,o_nz;
870   int         *d_nnz,*o_nnz;
871   int         i,k,global_row,global_col,first_diag_col,last_diag_col;
872   PetscScalar val;
873 
874   PetscFunctionBegin;
875   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
876   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
877 
878   m = n = B->mnls[rank];
879   d_nz = o_nz = 0;
880 
881   /* Determine preallocation for MatCreateMPIAIJ */
882   ierr = PetscMalloc(m*sizeof(int),&d_nnz);CHKERRQ(ierr);
883   ierr = PetscMalloc(m*sizeof(int),&o_nnz);CHKERRQ(ierr);
884   for (i=0; i<m; i++) d_nnz[i] = o_nnz[i] = 0;
885   first_diag_col = B->start_indices[rank];
886   last_diag_col = first_diag_col + B->mnls[rank];
887   for (i=0; i<B->mnls[rank]; i++) {
888     for (k=0; k<B->lines->len[i]; k++) {
889       global_col = B->lines->ptrs[i][k];
890       if ((global_col >= first_diag_col) && (global_col <= last_diag_col))
891 	d_nnz[i]++;
892       else
893 	o_nnz[i]++;
894     }
895   }
896 
897   M = N = B->n;
898   /* Here we only know how to create AIJ format */
899   ierr = MatCreate(comm,m,n,M,N,PB);CHKERRQ(ierr);
900   ierr = MatSetType(*PB,MATAIJ);CHKERRQ(ierr);
901   ierr = MatSeqAIJSetPreallocation(*PB,d_nz,d_nnz);CHKERRQ(ierr);
902   ierr = MatMPIAIJSetPreallocation(*PB,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
903 
904   for (i=0; i<B->mnls[rank]; i++) {
905     global_row = B->start_indices[rank]+i;
906     for (k=0; k<B->lines->len[i]; k++) {
907       global_col = B->lines->ptrs[i][k];
908       val = B->lines->A[i][k];
909       ierr = MatSetValues(*PB,1,&global_row,1,&global_col,&val,ADD_VALUES);CHKERRQ(ierr);
910     }
911   }
912 
913   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
914   ierr = PetscFree(o_nnz);CHKERRQ(ierr);
915 
916   ierr = MatAssemblyBegin(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
917   ierr = MatAssemblyEnd(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
918 
919   PetscFunctionReturn(0);
920 }
921 
922 /**********************************************************************/
923 
924 /*
925    Converts from an SPAI vector v  to a PETSc vec Pv.
926 */
927 #undef __FUNCT__
928 #define __FUNCT__ "ConvertVectorToVec"
929 PetscErrorCode ConvertVectorToVec(MPI_Comm comm,vector *v,Vec *Pv)
930 {
931   PetscErrorCode ierr;
932   int size,rank,m,M,i,*mnls,*start_indices,*global_indices;
933 
934   PetscFunctionBegin;
935   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
936   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
937 
938   m = v->mnl;
939   M = v->n;
940 
941 
942   ierr = VecCreateMPI(comm,m,M,Pv);CHKERRQ(ierr);
943 
944   ierr = PetscMalloc(size*sizeof(int),&mnls);CHKERRQ(ierr);
945   ierr = MPI_Allgather((void*)&v->mnl,1,MPI_INT,(void*)mnls,1,MPI_INT,comm);CHKERRQ(ierr);
946 
947   ierr = PetscMalloc(size*sizeof(int),&start_indices);CHKERRQ(ierr);
948   start_indices[0] = 0;
949   for (i=1; i<size; i++)
950     start_indices[i] = start_indices[i-1] +mnls[i-1];
951 
952   ierr = PetscMalloc(v->mnl*sizeof(int),&global_indices);CHKERRQ(ierr);
953   for (i=0; i<v->mnl; i++)
954     global_indices[i] = start_indices[rank] + i;
955 
956   ierr = PetscFree(mnls);CHKERRQ(ierr);
957   ierr = PetscFree(start_indices);CHKERRQ(ierr);
958 
959   ierr = VecSetValues(*Pv,v->mnl,global_indices,v->v,INSERT_VALUES);CHKERRQ(ierr);
960 
961   ierr = PetscFree(global_indices);CHKERRQ(ierr);
962 
963   ierr = VecAssemblyBegin(*Pv);CHKERRQ(ierr);
964   ierr = VecAssemblyEnd(*Pv);CHKERRQ(ierr);
965 
966   PetscFunctionReturn(0);
967 }
968 
969 
970 
971 
972