xref: /petsc/src/ksp/pc/impls/spai/ispai.c (revision a57dd577347cf357bc3142e2987c5ffed089475f)
1 
2 /*
3    3/99 Modified by Stephen Barnard to support SPAI version 3.0
4 */
5 
6 /*
7       Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner
8    Code written by Stephen Barnard.
9 
10       Note: there is some BAD memory bleeding below!
11 
12       This code needs work
13 
14    1) get rid of all memory bleeding
15    2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix
16       rather than having the sp flag for PC_SPAI
17    3) fix to set the block size based on the matrix block size
18 
19 */
20 
21 #include <petsc-private/pcimpl.h>        /*I "petscpc.h" I*/
22 #include "petscspai.h"
23 
24 /*
25     These are the SPAI include files
26 */
27 EXTERN_C_BEGIN
28 #define MPI /* required for setting SPAI_Comm correctly in basics.h */
29 #include <spai.h>
30 #include <matrix.h>
31 EXTERN_C_END
32 
33 extern PetscErrorCode ConvertMatToMatrix(MPI_Comm,Mat,Mat,matrix**);
34 extern PetscErrorCode ConvertMatrixToMat(MPI_Comm,matrix*,Mat*);
35 extern PetscErrorCode ConvertVectorToVec(MPI_Comm,vector *v,Vec *Pv);
36 extern PetscErrorCode MM_to_PETSC(char*,char*,char*);
37 
38 typedef struct {
39 
40   matrix *B;                /* matrix in SPAI format */
41   matrix *BT;               /* transpose of matrix in SPAI format */
42   matrix *M;                /* the approximate inverse in SPAI format */
43 
44   Mat PM;                   /* the approximate inverse PETSc format */
45 
46   double epsilon;           /* tolerance */
47   int    nbsteps;           /* max number of "improvement" steps per line */
48   int    max;               /* max dimensions of is_I, q, etc. */
49   int    maxnew;            /* max number of new entries per step */
50   int    block_size;        /* constant block size */
51   int    cache_size;        /* one of (1,2,3,4,5,6) indicting size of cache */
52   int    verbose;           /* SPAI prints timing and statistics */
53 
54   int      sp;              /* symmetric nonzero pattern */
55   MPI_Comm comm_spai;     /* communicator to be used with spai */
56 } PC_SPAI;
57 
58 /**********************************************************************/
59 
60 #undef __FUNCT__
61 #define __FUNCT__ "PCSetUp_SPAI"
62 static PetscErrorCode PCSetUp_SPAI(PC pc)
63 {
64   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
65   PetscErrorCode ierr;
66   Mat            AT;
67 
68   PetscFunctionBegin;
69   init_SPAI();
70 
71   if (ispai->sp) {
72     ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,pc->pmat,&ispai->B);CHKERRQ(ierr);
73   } else {
74     /* Use the transpose to get the column nonzero structure. */
75     ierr = MatTranspose(pc->pmat,MAT_INITIAL_MATRIX,&AT);CHKERRQ(ierr);
76     ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,AT,&ispai->B);CHKERRQ(ierr);
77     ierr = MatDestroy(&AT);CHKERRQ(ierr);
78   }
79 
80   /* Destroy the transpose */
81   /* Don't know how to do it. PETSc developers? */
82 
83   /* construct SPAI preconditioner */
84   /* FILE *messages */     /* file for warning messages */
85   /* double epsilon */     /* tolerance */
86   /* int nbsteps */        /* max number of "improvement" steps per line */
87   /* int max */            /* max dimensions of is_I, q, etc. */
88   /* int maxnew */         /* max number of new entries per step */
89   /* int block_size */     /* block_size == 1 specifies scalar elments
90                               block_size == n specifies nxn constant-block elements
91                               block_size == 0 specifies variable-block elements */
92   /* int cache_size */     /* one of (1,2,3,4,5,6) indicting size of cache. cache_size == 0 indicates no caching */
93   /* int    verbose    */  /* verbose == 0 specifies that SPAI is silent
94                               verbose == 1 prints timing and matrix statistics */
95 
96   ierr = bspai(ispai->B,&ispai->M,
97                stdout,
98                ispai->epsilon,
99                ispai->nbsteps,
100                ispai->max,
101                ispai->maxnew,
102                ispai->block_size,
103                ispai->cache_size,
104                ispai->verbose);CHKERRQ(ierr);
105 
106   ierr = ConvertMatrixToMat(PetscObjectComm((PetscObject)pc),ispai->M,&ispai->PM);CHKERRQ(ierr);
107 
108   /* free the SPAI matrices */
109   sp_free_matrix(ispai->B);
110   sp_free_matrix(ispai->M);
111   PetscFunctionReturn(0);
112 }
113 
114 /**********************************************************************/
115 
116 #undef __FUNCT__
117 #define __FUNCT__ "PCApply_SPAI"
118 static PetscErrorCode PCApply_SPAI(PC pc,Vec xx,Vec y)
119 {
120   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
121   PetscErrorCode ierr;
122 
123   PetscFunctionBegin;
124   /* Now using PETSc's multiply */
125   ierr = MatMult(ispai->PM,xx,y);CHKERRQ(ierr);
126   PetscFunctionReturn(0);
127 }
128 
129 /**********************************************************************/
130 
131 #undef __FUNCT__
132 #define __FUNCT__ "PCDestroy_SPAI"
133 static PetscErrorCode PCDestroy_SPAI(PC pc)
134 {
135   PetscErrorCode ierr;
136   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
137 
138   PetscFunctionBegin;
139   ierr = MatDestroy(&ispai->PM);CHKERRQ(ierr);
140   ierr = MPI_Comm_free(&(ispai->comm_spai));CHKERRQ(ierr);
141   ierr = PetscFree(pc->data);CHKERRQ(ierr);
142   PetscFunctionReturn(0);
143 }
144 
145 /**********************************************************************/
146 
147 #undef __FUNCT__
148 #define __FUNCT__ "PCView_SPAI"
149 static PetscErrorCode PCView_SPAI(PC pc,PetscViewer viewer)
150 {
151   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
152   PetscErrorCode ierr;
153   PetscBool      iascii;
154 
155   PetscFunctionBegin;
156   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
157   if (iascii) {
158     ierr = PetscViewerASCIIPrintf(viewer,"    SPAI preconditioner\n");CHKERRQ(ierr);
159     ierr = PetscViewerASCIIPrintf(viewer,"    epsilon %G\n",   ispai->epsilon);CHKERRQ(ierr);
160     ierr = PetscViewerASCIIPrintf(viewer,"    nbsteps %d\n",   ispai->nbsteps);CHKERRQ(ierr);
161     ierr = PetscViewerASCIIPrintf(viewer,"    max %d\n",       ispai->max);CHKERRQ(ierr);
162     ierr = PetscViewerASCIIPrintf(viewer,"    maxnew %d\n",    ispai->maxnew);CHKERRQ(ierr);
163     ierr = PetscViewerASCIIPrintf(viewer,"    block_size %d\n",ispai->block_size);CHKERRQ(ierr);
164     ierr = PetscViewerASCIIPrintf(viewer,"    cache_size %d\n",ispai->cache_size);CHKERRQ(ierr);
165     ierr = PetscViewerASCIIPrintf(viewer,"    verbose %d\n",   ispai->verbose);CHKERRQ(ierr);
166     ierr = PetscViewerASCIIPrintf(viewer,"    sp %d\n",        ispai->sp);CHKERRQ(ierr);
167   }
168   PetscFunctionReturn(0);
169 }
170 
171 EXTERN_C_BEGIN
172 #undef __FUNCT__
173 #define __FUNCT__ "PCSPAISetEpsilon_SPAI"
174 PetscErrorCode  PCSPAISetEpsilon_SPAI(PC pc,double epsilon1)
175 {
176   PC_SPAI *ispai = (PC_SPAI*)pc->data;
177 
178   PetscFunctionBegin;
179   ispai->epsilon = epsilon1;
180   PetscFunctionReturn(0);
181 }
182 EXTERN_C_END
183 
184 /**********************************************************************/
185 
186 EXTERN_C_BEGIN
187 #undef __FUNCT__
188 #define __FUNCT__ "PCSPAISetNBSteps_SPAI"
189 PetscErrorCode  PCSPAISetNBSteps_SPAI(PC pc,int nbsteps1)
190 {
191   PC_SPAI *ispai = (PC_SPAI*)pc->data;
192 
193   PetscFunctionBegin;
194   ispai->nbsteps = nbsteps1;
195   PetscFunctionReturn(0);
196 }
197 EXTERN_C_END
198 
199 /**********************************************************************/
200 
201 /* added 1/7/99 g.h. */
202 EXTERN_C_BEGIN
203 #undef __FUNCT__
204 #define __FUNCT__ "PCSPAISetMax_SPAI"
205 PetscErrorCode  PCSPAISetMax_SPAI(PC pc,int max1)
206 {
207   PC_SPAI *ispai = (PC_SPAI*)pc->data;
208 
209   PetscFunctionBegin;
210   ispai->max = max1;
211   PetscFunctionReturn(0);
212 }
213 EXTERN_C_END
214 
215 /**********************************************************************/
216 
217 EXTERN_C_BEGIN
218 #undef __FUNCT__
219 #define __FUNCT__ "PCSPAISetMaxNew_SPAI"
220 PetscErrorCode  PCSPAISetMaxNew_SPAI(PC pc,int maxnew1)
221 {
222   PC_SPAI *ispai = (PC_SPAI*)pc->data;
223 
224   PetscFunctionBegin;
225   ispai->maxnew = maxnew1;
226   PetscFunctionReturn(0);
227 }
228 EXTERN_C_END
229 
230 /**********************************************************************/
231 
232 EXTERN_C_BEGIN
233 #undef __FUNCT__
234 #define __FUNCT__ "PCSPAISetBlockSize_SPAI"
235 PetscErrorCode  PCSPAISetBlockSize_SPAI(PC pc,int block_size1)
236 {
237   PC_SPAI *ispai = (PC_SPAI*)pc->data;
238 
239   PetscFunctionBegin;
240   ispai->block_size = block_size1;
241   PetscFunctionReturn(0);
242 }
243 EXTERN_C_END
244 
245 /**********************************************************************/
246 
247 EXTERN_C_BEGIN
248 #undef __FUNCT__
249 #define __FUNCT__ "PCSPAISetCacheSize_SPAI"
250 PetscErrorCode  PCSPAISetCacheSize_SPAI(PC pc,int cache_size)
251 {
252   PC_SPAI *ispai = (PC_SPAI*)pc->data;
253 
254   PetscFunctionBegin;
255   ispai->cache_size = cache_size;
256   PetscFunctionReturn(0);
257 }
258 EXTERN_C_END
259 
260 /**********************************************************************/
261 
262 EXTERN_C_BEGIN
263 #undef __FUNCT__
264 #define __FUNCT__ "PCSPAISetVerbose_SPAI"
265 PetscErrorCode  PCSPAISetVerbose_SPAI(PC pc,int verbose)
266 {
267   PC_SPAI *ispai = (PC_SPAI*)pc->data;
268 
269   PetscFunctionBegin;
270   ispai->verbose = verbose;
271   PetscFunctionReturn(0);
272 }
273 EXTERN_C_END
274 
275 /**********************************************************************/
276 
277 EXTERN_C_BEGIN
278 #undef __FUNCT__
279 #define __FUNCT__ "PCSPAISetSp_SPAI"
280 PetscErrorCode  PCSPAISetSp_SPAI(PC pc,int sp)
281 {
282   PC_SPAI *ispai = (PC_SPAI*)pc->data;
283 
284   PetscFunctionBegin;
285   ispai->sp = sp;
286   PetscFunctionReturn(0);
287 }
288 EXTERN_C_END
289 
290 /* -------------------------------------------------------------------*/
291 
292 #undef __FUNCT__
293 #define __FUNCT__ "PCSPAISetEpsilon"
294 /*@
295   PCSPAISetEpsilon -- Set the tolerance for the SPAI preconditioner
296 
297   Input Parameters:
298 + pc - the preconditioner
299 - eps - epsilon (default .4)
300 
301   Notes:  Espilon must be between 0 and 1. It controls the
302                  quality of the approximation of M to the inverse of
303                  A. Higher values of epsilon lead to more work, more
304                  fill, and usually better preconditioners. In many
305                  cases the best choice of epsilon is the one that
306                  divides the total solution time equally between the
307                  preconditioner and the solver.
308 
309   Level: intermediate
310 
311 .seealso: PCSPAI, PCSetType()
312   @*/
313 PetscErrorCode  PCSPAISetEpsilon(PC pc,double epsilon1)
314 {
315   PetscErrorCode ierr;
316 
317   PetscFunctionBegin;
318   ierr = PetscTryMethod(pc,"PCSPAISetEpsilon_C",(PC,double),(pc,epsilon1));CHKERRQ(ierr);
319   PetscFunctionReturn(0);
320 }
321 
322 /**********************************************************************/
323 
324 #undef __FUNCT__
325 #define __FUNCT__ "PCSPAISetNBSteps"
326 /*@
327   PCSPAISetNBSteps - set maximum number of improvement steps per row in
328         the SPAI preconditioner
329 
330   Input Parameters:
331 + pc - the preconditioner
332 - n - number of steps (default 5)
333 
334   Notes:  SPAI constructs to approximation to every column of
335                  the exact inverse of A in a series of improvement
336                  steps. The quality of the approximation is determined
337                  by epsilon. If an approximation achieving an accuracy
338                  of epsilon is not obtained after ns steps, SPAI simply
339                  uses the best approximation constructed so far.
340 
341   Level: intermediate
342 
343 .seealso: PCSPAI, PCSetType(), PCSPAISetMaxNew()
344 @*/
345 PetscErrorCode  PCSPAISetNBSteps(PC pc,int nbsteps1)
346 {
347   PetscErrorCode ierr;
348 
349   PetscFunctionBegin;
350   ierr = PetscTryMethod(pc,"PCSPAISetNBSteps_C",(PC,int),(pc,nbsteps1));CHKERRQ(ierr);
351   PetscFunctionReturn(0);
352 }
353 
354 /**********************************************************************/
355 
356 /* added 1/7/99 g.h. */
357 #undef __FUNCT__
358 #define __FUNCT__ "PCSPAISetMax"
359 /*@
360   PCSPAISetMax - set the size of various working buffers in
361         the SPAI preconditioner
362 
363   Input Parameters:
364 + pc - the preconditioner
365 - n - size (default is 5000)
366 
367   Level: intermediate
368 
369 .seealso: PCSPAI, PCSetType()
370 @*/
371 PetscErrorCode  PCSPAISetMax(PC pc,int max1)
372 {
373   PetscErrorCode ierr;
374 
375   PetscFunctionBegin;
376   ierr = PetscTryMethod(pc,"PCSPAISetMax_C",(PC,int),(pc,max1));CHKERRQ(ierr);
377   PetscFunctionReturn(0);
378 }
379 
380 /**********************************************************************/
381 
382 #undef __FUNCT__
383 #define __FUNCT__ "PCSPAISetMaxNew"
384 /*@
385   PCSPAISetMaxNew - set maximum number of new nonzero candidates per step
386    in SPAI preconditioner
387 
388   Input Parameters:
389 + pc - the preconditioner
390 - n - maximum number (default 5)
391 
392   Level: intermediate
393 
394 .seealso: PCSPAI, PCSetType(), PCSPAISetNBSteps()
395 @*/
396 PetscErrorCode  PCSPAISetMaxNew(PC pc,int maxnew1)
397 {
398   PetscErrorCode ierr;
399 
400   PetscFunctionBegin;
401   ierr = PetscTryMethod(pc,"PCSPAISetMaxNew_C",(PC,int),(pc,maxnew1));CHKERRQ(ierr);
402   PetscFunctionReturn(0);
403 }
404 
405 /**********************************************************************/
406 
407 #undef __FUNCT__
408 #define __FUNCT__ "PCSPAISetBlockSize"
409 /*@
410   PCSPAISetBlockSize - set the block size for the SPAI preconditioner
411 
412   Input Parameters:
413 + pc - the preconditioner
414 - n - block size (default 1)
415 
416   Notes: A block
417                  size of 1 treats A as a matrix of scalar elements. A
418                  block size of s > 1 treats A as a matrix of sxs
419                  blocks. A block size of 0 treats A as a matrix with
420                  variable sized blocks, which are determined by
421                  searching for dense square diagonal blocks in A.
422                  This can be very effective for finite-element
423                  matrices.
424 
425                  SPAI will convert A to block form, use a block
426                  version of the preconditioner algorithm, and then
427                  convert the result back to scalar form.
428 
429                  In many cases the a block-size parameter other than 1
430                  can lead to very significant improvement in
431                  performance.
432 
433 
434   Level: intermediate
435 
436 .seealso: PCSPAI, PCSetType()
437 @*/
438 PetscErrorCode  PCSPAISetBlockSize(PC pc,int block_size1)
439 {
440   PetscErrorCode ierr;
441 
442   PetscFunctionBegin;
443   ierr = PetscTryMethod(pc,"PCSPAISetBlockSize_C",(PC,int),(pc,block_size1));CHKERRQ(ierr);
444   PetscFunctionReturn(0);
445 }
446 
447 /**********************************************************************/
448 
449 #undef __FUNCT__
450 #define __FUNCT__ "PCSPAISetCacheSize"
451 /*@
452   PCSPAISetCacheSize - specify cache size in the SPAI preconditioner
453 
454   Input Parameters:
455 + pc - the preconditioner
456 - n -  cache size {0,1,2,3,4,5} (default 5)
457 
458   Notes:    SPAI uses a hash table to cache messages and avoid
459                  redundant communication. If suggest always using
460                  5. This parameter is irrelevant in the serial
461                  version.
462 
463   Level: intermediate
464 
465 .seealso: PCSPAI, PCSetType()
466 @*/
467 PetscErrorCode  PCSPAISetCacheSize(PC pc,int cache_size)
468 {
469   PetscErrorCode ierr;
470 
471   PetscFunctionBegin;
472   ierr = PetscTryMethod(pc,"PCSPAISetCacheSize_C",(PC,int),(pc,cache_size));CHKERRQ(ierr);
473   PetscFunctionReturn(0);
474 }
475 
476 /**********************************************************************/
477 
478 #undef __FUNCT__
479 #define __FUNCT__ "PCSPAISetVerbose"
480 /*@
481   PCSPAISetVerbose - verbosity level for the SPAI preconditioner
482 
483   Input Parameters:
484 + pc - the preconditioner
485 - n - level (default 1)
486 
487   Notes: print parameters, timings and matrix statistics
488 
489   Level: intermediate
490 
491 .seealso: PCSPAI, PCSetType()
492 @*/
493 PetscErrorCode  PCSPAISetVerbose(PC pc,int verbose)
494 {
495   PetscErrorCode ierr;
496 
497   PetscFunctionBegin;
498   ierr = PetscTryMethod(pc,"PCSPAISetVerbose_C",(PC,int),(pc,verbose));CHKERRQ(ierr);
499   PetscFunctionReturn(0);
500 }
501 
502 /**********************************************************************/
503 
504 #undef __FUNCT__
505 #define __FUNCT__ "PCSPAISetSp"
506 /*@
507   PCSPAISetSp - specify a symmetric matrix sparsity pattern in the SPAI preconditioner
508 
509   Input Parameters:
510 + pc - the preconditioner
511 - n - 0 or 1
512 
513   Notes: If A has a symmetric nonzero pattern use -sp 1 to
514                  improve performance by eliminating some communication
515                  in the parallel version. Even if A does not have a
516                  symmetric nonzero pattern -sp 1 may well lead to good
517                  results, but the code will not follow the published
518                  SPAI algorithm exactly.
519 
520 
521   Level: intermediate
522 
523 .seealso: PCSPAI, PCSetType()
524 @*/
525 PetscErrorCode  PCSPAISetSp(PC pc,int sp)
526 {
527   PetscErrorCode ierr;
528 
529   PetscFunctionBegin;
530   ierr = PetscTryMethod(pc,"PCSPAISetSp_C",(PC,int),(pc,sp));CHKERRQ(ierr);
531   PetscFunctionReturn(0);
532 }
533 
534 /**********************************************************************/
535 
536 /**********************************************************************/
537 
538 #undef __FUNCT__
539 #define __FUNCT__ "PCSetFromOptions_SPAI"
540 static PetscErrorCode PCSetFromOptions_SPAI(PC pc)
541 {
542   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
543   PetscErrorCode ierr;
544   int            nbsteps1,max1,maxnew1,block_size1,cache_size,verbose,sp;
545   double         epsilon1;
546   PetscBool      flg;
547 
548   PetscFunctionBegin;
549   ierr = PetscOptionsHead("SPAI options");CHKERRQ(ierr);
550   ierr = PetscOptionsReal("-pc_spai_epsilon","","PCSPAISetEpsilon",ispai->epsilon,&epsilon1,&flg);CHKERRQ(ierr);
551   if (flg) {
552     ierr = PCSPAISetEpsilon(pc,epsilon1);CHKERRQ(ierr);
553   }
554   ierr = PetscOptionsInt("-pc_spai_nbsteps","","PCSPAISetNBSteps",ispai->nbsteps,&nbsteps1,&flg);CHKERRQ(ierr);
555   if (flg) {
556     ierr = PCSPAISetNBSteps(pc,nbsteps1);CHKERRQ(ierr);
557   }
558   /* added 1/7/99 g.h. */
559   ierr = PetscOptionsInt("-pc_spai_max","","PCSPAISetMax",ispai->max,&max1,&flg);CHKERRQ(ierr);
560   if (flg) {
561     ierr = PCSPAISetMax(pc,max1);CHKERRQ(ierr);
562   }
563   ierr = PetscOptionsInt("-pc_spai_maxnew","","PCSPAISetMaxNew",ispai->maxnew,&maxnew1,&flg);CHKERRQ(ierr);
564   if (flg) {
565     ierr = PCSPAISetMaxNew(pc,maxnew1);CHKERRQ(ierr);
566   }
567   ierr = PetscOptionsInt("-pc_spai_block_size","","PCSPAISetBlockSize",ispai->block_size,&block_size1,&flg);CHKERRQ(ierr);
568   if (flg) {
569     ierr = PCSPAISetBlockSize(pc,block_size1);CHKERRQ(ierr);
570   }
571   ierr = PetscOptionsInt("-pc_spai_cache_size","","PCSPAISetCacheSize",ispai->cache_size,&cache_size,&flg);CHKERRQ(ierr);
572   if (flg) {
573     ierr = PCSPAISetCacheSize(pc,cache_size);CHKERRQ(ierr);
574   }
575   ierr = PetscOptionsInt("-pc_spai_verbose","","PCSPAISetVerbose",ispai->verbose,&verbose,&flg);CHKERRQ(ierr);
576   if (flg) {
577     ierr = PCSPAISetVerbose(pc,verbose);CHKERRQ(ierr);
578   }
579   ierr = PetscOptionsInt("-pc_spai_sp","","PCSPAISetSp",ispai->sp,&sp,&flg);CHKERRQ(ierr);
580   if (flg) {
581     ierr = PCSPAISetSp(pc,sp);CHKERRQ(ierr);
582   }
583   ierr = PetscOptionsTail();CHKERRQ(ierr);
584   PetscFunctionReturn(0);
585 }
586 
587 /**********************************************************************/
588 
589 /*MC
590    PCSPAI - Use the Sparse Approximate Inverse method of Grote and Barnard
591      as a preconditioner (SIAM J. Sci. Comput.; vol 18, nr 3)
592 
593    Options Database Keys:
594 +  -pc_spai_epsilon <eps> - set tolerance
595 .  -pc_spai_nbstep <n> - set nbsteps
596 .  -pc_spai_max <m> - set max
597 .  -pc_spai_max_new <m> - set maxnew
598 .  -pc_spai_block_size <n> - set block size
599 .  -pc_spai_cache_size <n> - set cache size
600 .  -pc_spai_sp <m> - set sp
601 -  -pc_spai_set_verbose <true,false> - verbose output
602 
603    Notes: This only works with AIJ matrices.
604 
605    Level: beginner
606 
607    Concepts: approximate inverse
608 
609 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,
610     PCSPAISetEpsilon(), PCSPAISetMax(), PCSPAISetMaxNew(), PCSPAISetBlockSize(),
611     PCSPAISetVerbose(), PCSPAISetSp()
612 M*/
613 
614 EXTERN_C_BEGIN
615 #undef __FUNCT__
616 #define __FUNCT__ "PCCreate_SPAI"
617 PetscErrorCode  PCCreate_SPAI(PC pc)
618 {
619   PC_SPAI        *ispai;
620   PetscErrorCode ierr;
621 
622   PetscFunctionBegin;
623   ierr     = PetscNewLog(pc,PC_SPAI,&ispai);CHKERRQ(ierr);
624   pc->data = ispai;
625 
626   pc->ops->destroy         = PCDestroy_SPAI;
627   pc->ops->apply           = PCApply_SPAI;
628   pc->ops->applyrichardson = 0;
629   pc->ops->setup           = PCSetUp_SPAI;
630   pc->ops->view            = PCView_SPAI;
631   pc->ops->setfromoptions  = PCSetFromOptions_SPAI;
632 
633   ispai->epsilon    = .4;
634   ispai->nbsteps    = 5;
635   ispai->max        = 5000;
636   ispai->maxnew     = 5;
637   ispai->block_size = 1;
638   ispai->cache_size = 5;
639   ispai->verbose    = 0;
640 
641   ispai->sp = 1;
642   ierr      = MPI_Comm_dup(PetscObjectComm((PetscObject)pc),&(ispai->comm_spai));CHKERRQ(ierr);
643 
644   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetEpsilon_C","PCSPAISetEpsilon_SPAI",PCSPAISetEpsilon_SPAI);CHKERRQ(ierr);
645   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetNBSteps_C","PCSPAISetNBSteps_SPAI",PCSPAISetNBSteps_SPAI);CHKERRQ(ierr);
646   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetMax_C","PCSPAISetMax_SPAI",PCSPAISetMax_SPAI);CHKERRQ(ierr);
647   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetMaxNew_C","PCSPAISetMaxNew_SPAI",PCSPAISetMaxNew_SPAI);CHKERRQ(ierr);
648   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetBlockSize_C","PCSPAISetBlockSize_SPAI",PCSPAISetBlockSize_SPAI);CHKERRQ(ierr);
649   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetCacheSize_C","PCSPAISetCacheSize_SPAI",PCSPAISetCacheSize_SPAI);CHKERRQ(ierr);
650   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetVerbose_C","PCSPAISetVerbose_SPAI",PCSPAISetVerbose_SPAI);CHKERRQ(ierr);
651   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetSp_C","PCSPAISetSp_SPAI",PCSPAISetSp_SPAI);CHKERRQ(ierr);
652   PetscFunctionReturn(0);
653 }
654 EXTERN_C_END
655 
656 /**********************************************************************/
657 
658 /*
659    Converts from a PETSc matrix to an SPAI matrix
660 */
661 #undef __FUNCT__
662 #define __FUNCT__ "ConvertMatToMatrix"
663 PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A,Mat AT,matrix **B)
664 {
665   matrix                  *M;
666   int                     i,j,col;
667   int                     row_indx;
668   int                     len,pe,local_indx,start_indx;
669   int                     *mapping;
670   PetscErrorCode          ierr;
671   const int               *cols;
672   const double            *vals;
673   int                     *num_ptr,n,mnl,nnl,nz,rstart,rend;
674   PetscMPIInt             size,rank;
675   struct compressed_lines *rows;
676 
677   PetscFunctionBegin;
678   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
679   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
680   ierr = MatGetSize(A,&n,&n);CHKERRQ(ierr);
681   ierr = MatGetLocalSize(A,&mnl,&nnl);CHKERRQ(ierr);
682 
683   /*
684     not sure why a barrier is required. commenting out
685   ierr = MPI_Barrier(comm);CHKERRQ(ierr);
686   */
687 
688   M = new_matrix((SPAI_Comm)comm);
689 
690   M->n              = n;
691   M->bs             = 1;
692   M->max_block_size = 1;
693 
694   M->mnls          = (int*)malloc(sizeof(int)*size);
695   M->start_indices = (int*)malloc(sizeof(int)*size);
696   M->pe            = (int*)malloc(sizeof(int)*n);
697   M->block_sizes   = (int*)malloc(sizeof(int)*n);
698   for (i=0; i<n; i++) M->block_sizes[i] = 1;
699 
700   ierr = MPI_Allgather(&mnl,1,MPI_INT,M->mnls,1,MPI_INT,comm);CHKERRQ(ierr);
701 
702   M->start_indices[0] = 0;
703   for (i=1; i<size; i++) M->start_indices[i] = M->start_indices[i-1] + M->mnls[i-1];
704 
705   M->mnl            = M->mnls[M->myid];
706   M->my_start_index = M->start_indices[M->myid];
707 
708   for (i=0; i<size; i++) {
709     start_indx = M->start_indices[i];
710     for (j=0; j<M->mnls[i]; j++) M->pe[start_indx+j] = i;
711   }
712 
713   if (AT) {
714     M->lines = new_compressed_lines(M->mnls[rank],1);CHKERRQ(ierr);
715   } else {
716     M->lines = new_compressed_lines(M->mnls[rank],0);CHKERRQ(ierr);
717   }
718 
719   rows = M->lines;
720 
721   /* Determine the mapping from global indices to pointers */
722   ierr       = PetscMalloc(M->n*sizeof(int),&mapping);CHKERRQ(ierr);
723   pe         = 0;
724   local_indx = 0;
725   for (i=0; i<M->n; i++) {
726     if (local_indx >= M->mnls[pe]) {
727       pe++;
728       local_indx = 0;
729     }
730     mapping[i] = local_indx + M->start_indices[pe];
731     local_indx++;
732   }
733 
734 
735   ierr = PetscMalloc(mnl*sizeof(int),&num_ptr);CHKERRQ(ierr);
736 
737   /*********************************************************/
738   /************** Set up the row structure *****************/
739   /*********************************************************/
740 
741   /* count number of nonzeros in every row */
742   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
743   for (i=rstart; i<rend; i++) {
744     ierr = MatGetRow(A,i,&num_ptr[i-rstart],NULL,NULL);CHKERRQ(ierr);
745     ierr = MatRestoreRow(A,i,&num_ptr[i-rstart],NULL,NULL);CHKERRQ(ierr);
746   }
747 
748   /* allocate buffers */
749   len = 0;
750   for (i=0; i<mnl; i++) {
751     if (len < num_ptr[i]) len = num_ptr[i];
752   }
753 
754   for (i=rstart; i<rend; i++) {
755     row_indx             = i-rstart;
756     len                  = num_ptr[row_indx];
757     rows->ptrs[row_indx] = (int*)malloc(len*sizeof(int));
758     rows->A[row_indx]    = (double*)malloc(len*sizeof(double));
759   }
760 
761   /* copy the matrix */
762   for (i=rstart; i<rend; i++) {
763     row_indx = i - rstart;
764     ierr     = MatGetRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr);
765     for (j=0; j<nz; j++) {
766       col = cols[j];
767       len = rows->len[row_indx]++;
768 
769       rows->ptrs[row_indx][len] = mapping[col];
770       rows->A[row_indx][len]    = vals[j];
771     }
772     rows->slen[row_indx] = rows->len[row_indx];
773 
774     ierr = MatRestoreRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr);
775   }
776 
777 
778   /************************************************************/
779   /************** Set up the column structure *****************/
780   /*********************************************************/
781 
782   if (AT) {
783 
784     /* count number of nonzeros in every column */
785     for (i=rstart; i<rend; i++) {
786       ierr = MatGetRow(AT,i,&num_ptr[i-rstart],NULL,NULL);CHKERRQ(ierr);
787       ierr = MatRestoreRow(AT,i,&num_ptr[i-rstart],NULL,NULL);CHKERRQ(ierr);
788     }
789 
790     /* allocate buffers */
791     len = 0;
792     for (i=0; i<mnl; i++) {
793       if (len < num_ptr[i]) len = num_ptr[i];
794     }
795 
796     for (i=rstart; i<rend; i++) {
797       row_indx = i-rstart;
798       len      = num_ptr[row_indx];
799 
800       rows->rptrs[row_indx] = (int*)malloc(len*sizeof(int));
801     }
802 
803     /* copy the matrix (i.e., the structure) */
804     for (i=rstart; i<rend; i++) {
805       row_indx = i - rstart;
806       ierr     = MatGetRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr);
807       for (j=0; j<nz; j++) {
808         col = cols[j];
809         len = rows->rlen[row_indx]++;
810 
811         rows->rptrs[row_indx][len] = mapping[col];
812       }
813       ierr = MatRestoreRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr);
814     }
815   }
816 
817   ierr = PetscFree(num_ptr);CHKERRQ(ierr);
818   ierr = PetscFree(mapping);CHKERRQ(ierr);
819 
820   order_pointers(M);
821   M->maxnz = calc_maxnz(M);
822   *B       = M;
823   PetscFunctionReturn(0);
824 }
825 
826 /**********************************************************************/
827 
828 /*
829    Converts from an SPAI matrix B  to a PETSc matrix PB.
830    This assumes that the the SPAI matrix B is stored in
831    COMPRESSED-ROW format.
832 */
833 #undef __FUNCT__
834 #define __FUNCT__ "ConvertMatrixToMat"
835 PetscErrorCode ConvertMatrixToMat(MPI_Comm comm,matrix *B,Mat *PB)
836 {
837   PetscMPIInt    size,rank;
838   PetscErrorCode ierr;
839   int            m,n,M,N;
840   int            d_nz,o_nz;
841   int            *d_nnz,*o_nnz;
842   int            i,k,global_row,global_col,first_diag_col,last_diag_col;
843   PetscScalar    val;
844 
845   PetscFunctionBegin;
846   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
847   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
848 
849   m    = n = B->mnls[rank];
850   d_nz = o_nz = 0;
851 
852   /* Determine preallocation for MatCreateMPIAIJ */
853   ierr = PetscMalloc(m*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
854   ierr = PetscMalloc(m*sizeof(PetscInt),&o_nnz);CHKERRQ(ierr);
855   for (i=0; i<m; i++) d_nnz[i] = o_nnz[i] = 0;
856   first_diag_col = B->start_indices[rank];
857   last_diag_col  = first_diag_col + B->mnls[rank];
858   for (i=0; i<B->mnls[rank]; i++) {
859     for (k=0; k<B->lines->len[i]; k++) {
860       global_col = B->lines->ptrs[i][k];
861       if ((global_col >= first_diag_col) && (global_col < last_diag_col)) d_nnz[i]++;
862       else o_nnz[i]++;
863     }
864   }
865 
866   M = N = B->n;
867   /* Here we only know how to create AIJ format */
868   ierr = MatCreate(comm,PB);CHKERRQ(ierr);
869   ierr = MatSetSizes(*PB,m,n,M,N);CHKERRQ(ierr);
870   ierr = MatSetType(*PB,MATAIJ);CHKERRQ(ierr);
871   ierr = MatSeqAIJSetPreallocation(*PB,d_nz,d_nnz);CHKERRQ(ierr);
872   ierr = MatMPIAIJSetPreallocation(*PB,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
873 
874   for (i=0; i<B->mnls[rank]; i++) {
875     global_row = B->start_indices[rank]+i;
876     for (k=0; k<B->lines->len[i]; k++) {
877       global_col = B->lines->ptrs[i][k];
878 
879       val  = B->lines->A[i][k];
880       ierr = MatSetValues(*PB,1,&global_row,1,&global_col,&val,ADD_VALUES);CHKERRQ(ierr);
881     }
882   }
883 
884   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
885   ierr = PetscFree(o_nnz);CHKERRQ(ierr);
886 
887   ierr = MatAssemblyBegin(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
888   ierr = MatAssemblyEnd(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
889   PetscFunctionReturn(0);
890 }
891 
892 /**********************************************************************/
893 
894 /*
895    Converts from an SPAI vector v  to a PETSc vec Pv.
896 */
897 #undef __FUNCT__
898 #define __FUNCT__ "ConvertVectorToVec"
899 PetscErrorCode ConvertVectorToVec(MPI_Comm comm,vector *v,Vec *Pv)
900 {
901   PetscErrorCode ierr;
902   PetscMPIInt    size,rank;
903   int            m,M,i,*mnls,*start_indices,*global_indices;
904 
905   PetscFunctionBegin;
906   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
907   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
908 
909   m = v->mnl;
910   M = v->n;
911 
912 
913   ierr = VecCreateMPI(comm,m,M,Pv);CHKERRQ(ierr);
914 
915   ierr = PetscMalloc(size*sizeof(int),&mnls);CHKERRQ(ierr);
916   ierr = MPI_Allgather(&v->mnl,1,MPI_INT,mnls,1,MPI_INT,comm);CHKERRQ(ierr);
917 
918   ierr = PetscMalloc(size*sizeof(int),&start_indices);CHKERRQ(ierr);
919 
920   start_indices[0] = 0;
921   for (i=1; i<size; i++) start_indices[i] = start_indices[i-1] +mnls[i-1];
922 
923   ierr = PetscMalloc(v->mnl*sizeof(int),&global_indices);CHKERRQ(ierr);
924   for (i=0; i<v->mnl; i++) global_indices[i] = start_indices[rank] + i;
925 
926   ierr = PetscFree(mnls);CHKERRQ(ierr);
927   ierr = PetscFree(start_indices);CHKERRQ(ierr);
928 
929   ierr = VecSetValues(*Pv,v->mnl,global_indices,v->v,INSERT_VALUES);CHKERRQ(ierr);
930   ierr = VecAssemblyBegin(*Pv);CHKERRQ(ierr);
931   ierr = VecAssemblyEnd(*Pv);CHKERRQ(ierr);
932 
933   ierr = PetscFree(global_indices);CHKERRQ(ierr);
934   PetscFunctionReturn(0);
935 }
936 
937 
938 
939 
940