xref: /petsc/src/ksp/pc/impls/spai/ispai.c (revision f7a087814f99396ec53a0bad244374eaa0312225)
1 
2 /*
3    3/99 Modified by Stephen Barnard to support SPAI version 3.0
4 */
5 
6 /*
7       Provides an interface to the SPAI Sparse Approximate Inverse Preconditioner
8    Code written by Stephen Barnard.
9 
10       Note: there is some BAD memory bleeding below!
11 
12       This code needs work
13 
14    1) get rid of all memory bleeding
15    2) fix PETSc/interface so that it gets if the matrix is symmetric from the matrix
16       rather than having the sp flag for PC_SPAI
17    3) fix to set the block size based on the matrix block size
18 
19 */
20 
21 #include <petsc-private/pcimpl.h>        /*I "petscpc.h" I*/
22 #include "petscspai.h"
23 
24 /*
25     These are the SPAI include files
26 */
27 EXTERN_C_BEGIN
28 #define MPI /* required for setting SPAI_Comm correctly in basics.h */
29 #include <spai.h>
30 #include <matrix.h>
31 EXTERN_C_END
32 
33 extern PetscErrorCode ConvertMatToMatrix(MPI_Comm,Mat,Mat,matrix**);
34 extern PetscErrorCode ConvertMatrixToMat(MPI_Comm,matrix*,Mat*);
35 extern PetscErrorCode ConvertVectorToVec(MPI_Comm,vector *v,Vec *Pv);
36 extern PetscErrorCode MM_to_PETSC(char*,char*,char*);
37 
38 typedef struct {
39 
40   matrix *B;                /* matrix in SPAI format */
41   matrix *BT;               /* transpose of matrix in SPAI format */
42   matrix *M;                /* the approximate inverse in SPAI format */
43 
44   Mat PM;                   /* the approximate inverse PETSc format */
45 
46   double epsilon;           /* tolerance */
47   int    nbsteps;           /* max number of "improvement" steps per line */
48   int    max;               /* max dimensions of is_I, q, etc. */
49   int    maxnew;            /* max number of new entries per step */
50   int    block_size;        /* constant block size */
51   int    cache_size;        /* one of (1,2,3,4,5,6) indicting size of cache */
52   int    verbose;           /* SPAI prints timing and statistics */
53 
54   int      sp;              /* symmetric nonzero pattern */
55   MPI_Comm comm_spai;     /* communicator to be used with spai */
56 } PC_SPAI;
57 
58 /**********************************************************************/
59 
60 #undef __FUNCT__
61 #define __FUNCT__ "PCSetUp_SPAI"
62 static PetscErrorCode PCSetUp_SPAI(PC pc)
63 {
64   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
65   PetscErrorCode ierr;
66   Mat            AT;
67 
68   PetscFunctionBegin;
69   init_SPAI();
70 
71   if (ispai->sp) {
72     ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,pc->pmat,&ispai->B);CHKERRQ(ierr);
73   } else {
74     /* Use the transpose to get the column nonzero structure. */
75     ierr = MatTranspose(pc->pmat,MAT_INITIAL_MATRIX,&AT);CHKERRQ(ierr);
76     ierr = ConvertMatToMatrix(ispai->comm_spai,pc->pmat,AT,&ispai->B);CHKERRQ(ierr);
77     ierr = MatDestroy(&AT);CHKERRQ(ierr);
78   }
79 
80   /* Destroy the transpose */
81   /* Don't know how to do it. PETSc developers? */
82 
83   /* construct SPAI preconditioner */
84   /* FILE *messages */     /* file for warning messages */
85   /* double epsilon */     /* tolerance */
86   /* int nbsteps */        /* max number of "improvement" steps per line */
87   /* int max */            /* max dimensions of is_I, q, etc. */
88   /* int maxnew */         /* max number of new entries per step */
89   /* int block_size */     /* block_size == 1 specifies scalar elments
90                               block_size == n specifies nxn constant-block elements
91                               block_size == 0 specifies variable-block elements */
92   /* int cache_size */     /* one of (1,2,3,4,5,6) indicting size of cache. cache_size == 0 indicates no caching */
93   /* int    verbose    */  /* verbose == 0 specifies that SPAI is silent
94                               verbose == 1 prints timing and matrix statistics */
95 
96   ierr = bspai(ispai->B,&ispai->M,
97                stdout,
98                ispai->epsilon,
99                ispai->nbsteps,
100                ispai->max,
101                ispai->maxnew,
102                ispai->block_size,
103                ispai->cache_size,
104                ispai->verbose);CHKERRQ(ierr);
105 
106   ierr = ConvertMatrixToMat(PetscObjectComm((PetscObject)pc),ispai->M,&ispai->PM);CHKERRQ(ierr);
107 
108   /* free the SPAI matrices */
109   sp_free_matrix(ispai->B);
110   sp_free_matrix(ispai->M);
111   PetscFunctionReturn(0);
112 }
113 
114 /**********************************************************************/
115 
116 #undef __FUNCT__
117 #define __FUNCT__ "PCApply_SPAI"
118 static PetscErrorCode PCApply_SPAI(PC pc,Vec xx,Vec y)
119 {
120   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
121   PetscErrorCode ierr;
122 
123   PetscFunctionBegin;
124   /* Now using PETSc's multiply */
125   ierr = MatMult(ispai->PM,xx,y);CHKERRQ(ierr);
126   PetscFunctionReturn(0);
127 }
128 
129 /**********************************************************************/
130 
131 #undef __FUNCT__
132 #define __FUNCT__ "PCDestroy_SPAI"
133 static PetscErrorCode PCDestroy_SPAI(PC pc)
134 {
135   PetscErrorCode ierr;
136   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
137 
138   PetscFunctionBegin;
139   ierr = MatDestroy(&ispai->PM);CHKERRQ(ierr);
140   ierr = MPI_Comm_free(&(ispai->comm_spai));CHKERRQ(ierr);
141   ierr = PetscFree(pc->data);CHKERRQ(ierr);
142   PetscFunctionReturn(0);
143 }
144 
145 /**********************************************************************/
146 
147 #undef __FUNCT__
148 #define __FUNCT__ "PCView_SPAI"
149 static PetscErrorCode PCView_SPAI(PC pc,PetscViewer viewer)
150 {
151   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
152   PetscErrorCode ierr;
153   PetscBool      iascii;
154 
155   PetscFunctionBegin;
156   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
157   if (iascii) {
158     ierr = PetscViewerASCIIPrintf(viewer,"    SPAI preconditioner\n");CHKERRQ(ierr);
159     ierr = PetscViewerASCIIPrintf(viewer,"    epsilon %G\n",   ispai->epsilon);CHKERRQ(ierr);
160     ierr = PetscViewerASCIIPrintf(viewer,"    nbsteps %d\n",   ispai->nbsteps);CHKERRQ(ierr);
161     ierr = PetscViewerASCIIPrintf(viewer,"    max %d\n",       ispai->max);CHKERRQ(ierr);
162     ierr = PetscViewerASCIIPrintf(viewer,"    maxnew %d\n",    ispai->maxnew);CHKERRQ(ierr);
163     ierr = PetscViewerASCIIPrintf(viewer,"    block_size %d\n",ispai->block_size);CHKERRQ(ierr);
164     ierr = PetscViewerASCIIPrintf(viewer,"    cache_size %d\n",ispai->cache_size);CHKERRQ(ierr);
165     ierr = PetscViewerASCIIPrintf(viewer,"    verbose %d\n",   ispai->verbose);CHKERRQ(ierr);
166     ierr = PetscViewerASCIIPrintf(viewer,"    sp %d\n",        ispai->sp);CHKERRQ(ierr);
167   }
168   PetscFunctionReturn(0);
169 }
170 
171 #undef __FUNCT__
172 #define __FUNCT__ "PCSPAISetEpsilon_SPAI"
173 static PetscErrorCode  PCSPAISetEpsilon_SPAI(PC pc,double epsilon1)
174 {
175   PC_SPAI *ispai = (PC_SPAI*)pc->data;
176 
177   PetscFunctionBegin;
178   ispai->epsilon = epsilon1;
179   PetscFunctionReturn(0);
180 }
181 
182 /**********************************************************************/
183 
184 #undef __FUNCT__
185 #define __FUNCT__ "PCSPAISetNBSteps_SPAI"
186 static PetscErrorCode  PCSPAISetNBSteps_SPAI(PC pc,int nbsteps1)
187 {
188   PC_SPAI *ispai = (PC_SPAI*)pc->data;
189 
190   PetscFunctionBegin;
191   ispai->nbsteps = nbsteps1;
192   PetscFunctionReturn(0);
193 }
194 
195 /**********************************************************************/
196 
197 /* added 1/7/99 g.h. */
198 #undef __FUNCT__
199 #define __FUNCT__ "PCSPAISetMax_SPAI"
200 static PetscErrorCode  PCSPAISetMax_SPAI(PC pc,int max1)
201 {
202   PC_SPAI *ispai = (PC_SPAI*)pc->data;
203 
204   PetscFunctionBegin;
205   ispai->max = max1;
206   PetscFunctionReturn(0);
207 }
208 
209 /**********************************************************************/
210 
211 #undef __FUNCT__
212 #define __FUNCT__ "PCSPAISetMaxNew_SPAI"
213 static PetscErrorCode  PCSPAISetMaxNew_SPAI(PC pc,int maxnew1)
214 {
215   PC_SPAI *ispai = (PC_SPAI*)pc->data;
216 
217   PetscFunctionBegin;
218   ispai->maxnew = maxnew1;
219   PetscFunctionReturn(0);
220 }
221 
222 /**********************************************************************/
223 
224 #undef __FUNCT__
225 #define __FUNCT__ "PCSPAISetBlockSize_SPAI"
226 static PetscErrorCode  PCSPAISetBlockSize_SPAI(PC pc,int block_size1)
227 {
228   PC_SPAI *ispai = (PC_SPAI*)pc->data;
229 
230   PetscFunctionBegin;
231   ispai->block_size = block_size1;
232   PetscFunctionReturn(0);
233 }
234 
235 /**********************************************************************/
236 
237 #undef __FUNCT__
238 #define __FUNCT__ "PCSPAISetCacheSize_SPAI"
239 static PetscErrorCode  PCSPAISetCacheSize_SPAI(PC pc,int cache_size)
240 {
241   PC_SPAI *ispai = (PC_SPAI*)pc->data;
242 
243   PetscFunctionBegin;
244   ispai->cache_size = cache_size;
245   PetscFunctionReturn(0);
246 }
247 
248 /**********************************************************************/
249 
250 #undef __FUNCT__
251 #define __FUNCT__ "PCSPAISetVerbose_SPAI"
252 static PetscErrorCode  PCSPAISetVerbose_SPAI(PC pc,int verbose)
253 {
254   PC_SPAI *ispai = (PC_SPAI*)pc->data;
255 
256   PetscFunctionBegin;
257   ispai->verbose = verbose;
258   PetscFunctionReturn(0);
259 }
260 
261 /**********************************************************************/
262 
263 #undef __FUNCT__
264 #define __FUNCT__ "PCSPAISetSp_SPAI"
265 static PetscErrorCode  PCSPAISetSp_SPAI(PC pc,int sp)
266 {
267   PC_SPAI *ispai = (PC_SPAI*)pc->data;
268 
269   PetscFunctionBegin;
270   ispai->sp = sp;
271   PetscFunctionReturn(0);
272 }
273 
274 /* -------------------------------------------------------------------*/
275 
276 #undef __FUNCT__
277 #define __FUNCT__ "PCSPAISetEpsilon"
278 /*@
279   PCSPAISetEpsilon -- Set the tolerance for the SPAI preconditioner
280 
281   Input Parameters:
282 + pc - the preconditioner
283 - eps - epsilon (default .4)
284 
285   Notes:  Espilon must be between 0 and 1. It controls the
286                  quality of the approximation of M to the inverse of
287                  A. Higher values of epsilon lead to more work, more
288                  fill, and usually better preconditioners. In many
289                  cases the best choice of epsilon is the one that
290                  divides the total solution time equally between the
291                  preconditioner and the solver.
292 
293   Level: intermediate
294 
295 .seealso: PCSPAI, PCSetType()
296   @*/
297 PetscErrorCode  PCSPAISetEpsilon(PC pc,double epsilon1)
298 {
299   PetscErrorCode ierr;
300 
301   PetscFunctionBegin;
302   ierr = PetscTryMethod(pc,"PCSPAISetEpsilon_C",(PC,double),(pc,epsilon1));CHKERRQ(ierr);
303   PetscFunctionReturn(0);
304 }
305 
306 /**********************************************************************/
307 
308 #undef __FUNCT__
309 #define __FUNCT__ "PCSPAISetNBSteps"
310 /*@
311   PCSPAISetNBSteps - set maximum number of improvement steps per row in
312         the SPAI preconditioner
313 
314   Input Parameters:
315 + pc - the preconditioner
316 - n - number of steps (default 5)
317 
318   Notes:  SPAI constructs to approximation to every column of
319                  the exact inverse of A in a series of improvement
320                  steps. The quality of the approximation is determined
321                  by epsilon. If an approximation achieving an accuracy
322                  of epsilon is not obtained after ns steps, SPAI simply
323                  uses the best approximation constructed so far.
324 
325   Level: intermediate
326 
327 .seealso: PCSPAI, PCSetType(), PCSPAISetMaxNew()
328 @*/
329 PetscErrorCode  PCSPAISetNBSteps(PC pc,int nbsteps1)
330 {
331   PetscErrorCode ierr;
332 
333   PetscFunctionBegin;
334   ierr = PetscTryMethod(pc,"PCSPAISetNBSteps_C",(PC,int),(pc,nbsteps1));CHKERRQ(ierr);
335   PetscFunctionReturn(0);
336 }
337 
338 /**********************************************************************/
339 
340 /* added 1/7/99 g.h. */
341 #undef __FUNCT__
342 #define __FUNCT__ "PCSPAISetMax"
343 /*@
344   PCSPAISetMax - set the size of various working buffers in
345         the SPAI preconditioner
346 
347   Input Parameters:
348 + pc - the preconditioner
349 - n - size (default is 5000)
350 
351   Level: intermediate
352 
353 .seealso: PCSPAI, PCSetType()
354 @*/
355 PetscErrorCode  PCSPAISetMax(PC pc,int max1)
356 {
357   PetscErrorCode ierr;
358 
359   PetscFunctionBegin;
360   ierr = PetscTryMethod(pc,"PCSPAISetMax_C",(PC,int),(pc,max1));CHKERRQ(ierr);
361   PetscFunctionReturn(0);
362 }
363 
364 /**********************************************************************/
365 
366 #undef __FUNCT__
367 #define __FUNCT__ "PCSPAISetMaxNew"
368 /*@
369   PCSPAISetMaxNew - set maximum number of new nonzero candidates per step
370    in SPAI preconditioner
371 
372   Input Parameters:
373 + pc - the preconditioner
374 - n - maximum number (default 5)
375 
376   Level: intermediate
377 
378 .seealso: PCSPAI, PCSetType(), PCSPAISetNBSteps()
379 @*/
380 PetscErrorCode  PCSPAISetMaxNew(PC pc,int maxnew1)
381 {
382   PetscErrorCode ierr;
383 
384   PetscFunctionBegin;
385   ierr = PetscTryMethod(pc,"PCSPAISetMaxNew_C",(PC,int),(pc,maxnew1));CHKERRQ(ierr);
386   PetscFunctionReturn(0);
387 }
388 
389 /**********************************************************************/
390 
391 #undef __FUNCT__
392 #define __FUNCT__ "PCSPAISetBlockSize"
393 /*@
394   PCSPAISetBlockSize - set the block size for the SPAI preconditioner
395 
396   Input Parameters:
397 + pc - the preconditioner
398 - n - block size (default 1)
399 
400   Notes: A block
401                  size of 1 treats A as a matrix of scalar elements. A
402                  block size of s > 1 treats A as a matrix of sxs
403                  blocks. A block size of 0 treats A as a matrix with
404                  variable sized blocks, which are determined by
405                  searching for dense square diagonal blocks in A.
406                  This can be very effective for finite-element
407                  matrices.
408 
409                  SPAI will convert A to block form, use a block
410                  version of the preconditioner algorithm, and then
411                  convert the result back to scalar form.
412 
413                  In many cases the a block-size parameter other than 1
414                  can lead to very significant improvement in
415                  performance.
416 
417 
418   Level: intermediate
419 
420 .seealso: PCSPAI, PCSetType()
421 @*/
422 PetscErrorCode  PCSPAISetBlockSize(PC pc,int block_size1)
423 {
424   PetscErrorCode ierr;
425 
426   PetscFunctionBegin;
427   ierr = PetscTryMethod(pc,"PCSPAISetBlockSize_C",(PC,int),(pc,block_size1));CHKERRQ(ierr);
428   PetscFunctionReturn(0);
429 }
430 
431 /**********************************************************************/
432 
433 #undef __FUNCT__
434 #define __FUNCT__ "PCSPAISetCacheSize"
435 /*@
436   PCSPAISetCacheSize - specify cache size in the SPAI preconditioner
437 
438   Input Parameters:
439 + pc - the preconditioner
440 - n -  cache size {0,1,2,3,4,5} (default 5)
441 
442   Notes:    SPAI uses a hash table to cache messages and avoid
443                  redundant communication. If suggest always using
444                  5. This parameter is irrelevant in the serial
445                  version.
446 
447   Level: intermediate
448 
449 .seealso: PCSPAI, PCSetType()
450 @*/
451 PetscErrorCode  PCSPAISetCacheSize(PC pc,int cache_size)
452 {
453   PetscErrorCode ierr;
454 
455   PetscFunctionBegin;
456   ierr = PetscTryMethod(pc,"PCSPAISetCacheSize_C",(PC,int),(pc,cache_size));CHKERRQ(ierr);
457   PetscFunctionReturn(0);
458 }
459 
460 /**********************************************************************/
461 
462 #undef __FUNCT__
463 #define __FUNCT__ "PCSPAISetVerbose"
464 /*@
465   PCSPAISetVerbose - verbosity level for the SPAI preconditioner
466 
467   Input Parameters:
468 + pc - the preconditioner
469 - n - level (default 1)
470 
471   Notes: print parameters, timings and matrix statistics
472 
473   Level: intermediate
474 
475 .seealso: PCSPAI, PCSetType()
476 @*/
477 PetscErrorCode  PCSPAISetVerbose(PC pc,int verbose)
478 {
479   PetscErrorCode ierr;
480 
481   PetscFunctionBegin;
482   ierr = PetscTryMethod(pc,"PCSPAISetVerbose_C",(PC,int),(pc,verbose));CHKERRQ(ierr);
483   PetscFunctionReturn(0);
484 }
485 
486 /**********************************************************************/
487 
488 #undef __FUNCT__
489 #define __FUNCT__ "PCSPAISetSp"
490 /*@
491   PCSPAISetSp - specify a symmetric matrix sparsity pattern in the SPAI preconditioner
492 
493   Input Parameters:
494 + pc - the preconditioner
495 - n - 0 or 1
496 
497   Notes: If A has a symmetric nonzero pattern use -sp 1 to
498                  improve performance by eliminating some communication
499                  in the parallel version. Even if A does not have a
500                  symmetric nonzero pattern -sp 1 may well lead to good
501                  results, but the code will not follow the published
502                  SPAI algorithm exactly.
503 
504 
505   Level: intermediate
506 
507 .seealso: PCSPAI, PCSetType()
508 @*/
509 PetscErrorCode  PCSPAISetSp(PC pc,int sp)
510 {
511   PetscErrorCode ierr;
512 
513   PetscFunctionBegin;
514   ierr = PetscTryMethod(pc,"PCSPAISetSp_C",(PC,int),(pc,sp));CHKERRQ(ierr);
515   PetscFunctionReturn(0);
516 }
517 
518 /**********************************************************************/
519 
520 /**********************************************************************/
521 
522 #undef __FUNCT__
523 #define __FUNCT__ "PCSetFromOptions_SPAI"
524 static PetscErrorCode PCSetFromOptions_SPAI(PC pc)
525 {
526   PC_SPAI        *ispai = (PC_SPAI*)pc->data;
527   PetscErrorCode ierr;
528   int            nbsteps1,max1,maxnew1,block_size1,cache_size,verbose,sp;
529   double         epsilon1;
530   PetscBool      flg;
531 
532   PetscFunctionBegin;
533   ierr = PetscOptionsHead("SPAI options");CHKERRQ(ierr);
534   ierr = PetscOptionsReal("-pc_spai_epsilon","","PCSPAISetEpsilon",ispai->epsilon,&epsilon1,&flg);CHKERRQ(ierr);
535   if (flg) {
536     ierr = PCSPAISetEpsilon(pc,epsilon1);CHKERRQ(ierr);
537   }
538   ierr = PetscOptionsInt("-pc_spai_nbsteps","","PCSPAISetNBSteps",ispai->nbsteps,&nbsteps1,&flg);CHKERRQ(ierr);
539   if (flg) {
540     ierr = PCSPAISetNBSteps(pc,nbsteps1);CHKERRQ(ierr);
541   }
542   /* added 1/7/99 g.h. */
543   ierr = PetscOptionsInt("-pc_spai_max","","PCSPAISetMax",ispai->max,&max1,&flg);CHKERRQ(ierr);
544   if (flg) {
545     ierr = PCSPAISetMax(pc,max1);CHKERRQ(ierr);
546   }
547   ierr = PetscOptionsInt("-pc_spai_maxnew","","PCSPAISetMaxNew",ispai->maxnew,&maxnew1,&flg);CHKERRQ(ierr);
548   if (flg) {
549     ierr = PCSPAISetMaxNew(pc,maxnew1);CHKERRQ(ierr);
550   }
551   ierr = PetscOptionsInt("-pc_spai_block_size","","PCSPAISetBlockSize",ispai->block_size,&block_size1,&flg);CHKERRQ(ierr);
552   if (flg) {
553     ierr = PCSPAISetBlockSize(pc,block_size1);CHKERRQ(ierr);
554   }
555   ierr = PetscOptionsInt("-pc_spai_cache_size","","PCSPAISetCacheSize",ispai->cache_size,&cache_size,&flg);CHKERRQ(ierr);
556   if (flg) {
557     ierr = PCSPAISetCacheSize(pc,cache_size);CHKERRQ(ierr);
558   }
559   ierr = PetscOptionsInt("-pc_spai_verbose","","PCSPAISetVerbose",ispai->verbose,&verbose,&flg);CHKERRQ(ierr);
560   if (flg) {
561     ierr = PCSPAISetVerbose(pc,verbose);CHKERRQ(ierr);
562   }
563   ierr = PetscOptionsInt("-pc_spai_sp","","PCSPAISetSp",ispai->sp,&sp,&flg);CHKERRQ(ierr);
564   if (flg) {
565     ierr = PCSPAISetSp(pc,sp);CHKERRQ(ierr);
566   }
567   ierr = PetscOptionsTail();CHKERRQ(ierr);
568   PetscFunctionReturn(0);
569 }
570 
571 /**********************************************************************/
572 
573 /*MC
574    PCSPAI - Use the Sparse Approximate Inverse method of Grote and Barnard
575      as a preconditioner (SIAM J. Sci. Comput.; vol 18, nr 3)
576 
577    Options Database Keys:
578 +  -pc_spai_epsilon <eps> - set tolerance
579 .  -pc_spai_nbstep <n> - set nbsteps
580 .  -pc_spai_max <m> - set max
581 .  -pc_spai_max_new <m> - set maxnew
582 .  -pc_spai_block_size <n> - set block size
583 .  -pc_spai_cache_size <n> - set cache size
584 .  -pc_spai_sp <m> - set sp
585 -  -pc_spai_set_verbose <true,false> - verbose output
586 
587    Notes: This only works with AIJ matrices.
588 
589    Level: beginner
590 
591    Concepts: approximate inverse
592 
593 .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC,
594     PCSPAISetEpsilon(), PCSPAISetMax(), PCSPAISetMaxNew(), PCSPAISetBlockSize(),
595     PCSPAISetVerbose(), PCSPAISetSp()
596 M*/
597 
598 EXTERN_C_BEGIN
599 #undef __FUNCT__
600 #define __FUNCT__ "PCCreate_SPAI"
601 PetscErrorCode  PCCreate_SPAI(PC pc)
602 {
603   PC_SPAI        *ispai;
604   PetscErrorCode ierr;
605 
606   PetscFunctionBegin;
607   ierr     = PetscNewLog(pc,PC_SPAI,&ispai);CHKERRQ(ierr);
608   pc->data = ispai;
609 
610   pc->ops->destroy         = PCDestroy_SPAI;
611   pc->ops->apply           = PCApply_SPAI;
612   pc->ops->applyrichardson = 0;
613   pc->ops->setup           = PCSetUp_SPAI;
614   pc->ops->view            = PCView_SPAI;
615   pc->ops->setfromoptions  = PCSetFromOptions_SPAI;
616 
617   ispai->epsilon    = .4;
618   ispai->nbsteps    = 5;
619   ispai->max        = 5000;
620   ispai->maxnew     = 5;
621   ispai->block_size = 1;
622   ispai->cache_size = 5;
623   ispai->verbose    = 0;
624 
625   ispai->sp = 1;
626   ierr      = MPI_Comm_dup(PetscObjectComm((PetscObject)pc),&(ispai->comm_spai));CHKERRQ(ierr);
627 
628   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetEpsilon_C","PCSPAISetEpsilon_SPAI",PCSPAISetEpsilon_SPAI);CHKERRQ(ierr);
629   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetNBSteps_C","PCSPAISetNBSteps_SPAI",PCSPAISetNBSteps_SPAI);CHKERRQ(ierr);
630   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetMax_C","PCSPAISetMax_SPAI",PCSPAISetMax_SPAI);CHKERRQ(ierr);
631   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetMaxNew_C","PCSPAISetMaxNew_SPAI",PCSPAISetMaxNew_SPAI);CHKERRQ(ierr);
632   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetBlockSize_C","PCSPAISetBlockSize_SPAI",PCSPAISetBlockSize_SPAI);CHKERRQ(ierr);
633   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetCacheSize_C","PCSPAISetCacheSize_SPAI",PCSPAISetCacheSize_SPAI);CHKERRQ(ierr);
634   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetVerbose_C","PCSPAISetVerbose_SPAI",PCSPAISetVerbose_SPAI);CHKERRQ(ierr);
635   ierr = PetscObjectComposeFunction((PetscObject)pc,"PCSPAISetSp_C","PCSPAISetSp_SPAI",PCSPAISetSp_SPAI);CHKERRQ(ierr);
636   PetscFunctionReturn(0);
637 }
638 EXTERN_C_END
639 
640 /**********************************************************************/
641 
642 /*
643    Converts from a PETSc matrix to an SPAI matrix
644 */
645 #undef __FUNCT__
646 #define __FUNCT__ "ConvertMatToMatrix"
647 PetscErrorCode ConvertMatToMatrix(MPI_Comm comm, Mat A,Mat AT,matrix **B)
648 {
649   matrix                  *M;
650   int                     i,j,col;
651   int                     row_indx;
652   int                     len,pe,local_indx,start_indx;
653   int                     *mapping;
654   PetscErrorCode          ierr;
655   const int               *cols;
656   const double            *vals;
657   int                     *num_ptr,n,mnl,nnl,nz,rstart,rend;
658   PetscMPIInt             size,rank;
659   struct compressed_lines *rows;
660 
661   PetscFunctionBegin;
662   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
663   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
664   ierr = MatGetSize(A,&n,&n);CHKERRQ(ierr);
665   ierr = MatGetLocalSize(A,&mnl,&nnl);CHKERRQ(ierr);
666 
667   /*
668     not sure why a barrier is required. commenting out
669   ierr = MPI_Barrier(comm);CHKERRQ(ierr);
670   */
671 
672   M = new_matrix((SPAI_Comm)comm);
673 
674   M->n              = n;
675   M->bs             = 1;
676   M->max_block_size = 1;
677 
678   M->mnls          = (int*)malloc(sizeof(int)*size);
679   M->start_indices = (int*)malloc(sizeof(int)*size);
680   M->pe            = (int*)malloc(sizeof(int)*n);
681   M->block_sizes   = (int*)malloc(sizeof(int)*n);
682   for (i=0; i<n; i++) M->block_sizes[i] = 1;
683 
684   ierr = MPI_Allgather(&mnl,1,MPI_INT,M->mnls,1,MPI_INT,comm);CHKERRQ(ierr);
685 
686   M->start_indices[0] = 0;
687   for (i=1; i<size; i++) M->start_indices[i] = M->start_indices[i-1] + M->mnls[i-1];
688 
689   M->mnl            = M->mnls[M->myid];
690   M->my_start_index = M->start_indices[M->myid];
691 
692   for (i=0; i<size; i++) {
693     start_indx = M->start_indices[i];
694     for (j=0; j<M->mnls[i]; j++) M->pe[start_indx+j] = i;
695   }
696 
697   if (AT) {
698     M->lines = new_compressed_lines(M->mnls[rank],1);CHKERRQ(ierr);
699   } else {
700     M->lines = new_compressed_lines(M->mnls[rank],0);CHKERRQ(ierr);
701   }
702 
703   rows = M->lines;
704 
705   /* Determine the mapping from global indices to pointers */
706   ierr       = PetscMalloc(M->n*sizeof(int),&mapping);CHKERRQ(ierr);
707   pe         = 0;
708   local_indx = 0;
709   for (i=0; i<M->n; i++) {
710     if (local_indx >= M->mnls[pe]) {
711       pe++;
712       local_indx = 0;
713     }
714     mapping[i] = local_indx + M->start_indices[pe];
715     local_indx++;
716   }
717 
718 
719   ierr = PetscMalloc(mnl*sizeof(int),&num_ptr);CHKERRQ(ierr);
720 
721   /*********************************************************/
722   /************** Set up the row structure *****************/
723   /*********************************************************/
724 
725   /* count number of nonzeros in every row */
726   ierr = MatGetOwnershipRange(A,&rstart,&rend);CHKERRQ(ierr);
727   for (i=rstart; i<rend; i++) {
728     ierr = MatGetRow(A,i,&num_ptr[i-rstart],NULL,NULL);CHKERRQ(ierr);
729     ierr = MatRestoreRow(A,i,&num_ptr[i-rstart],NULL,NULL);CHKERRQ(ierr);
730   }
731 
732   /* allocate buffers */
733   len = 0;
734   for (i=0; i<mnl; i++) {
735     if (len < num_ptr[i]) len = num_ptr[i];
736   }
737 
738   for (i=rstart; i<rend; i++) {
739     row_indx             = i-rstart;
740     len                  = num_ptr[row_indx];
741     rows->ptrs[row_indx] = (int*)malloc(len*sizeof(int));
742     rows->A[row_indx]    = (double*)malloc(len*sizeof(double));
743   }
744 
745   /* copy the matrix */
746   for (i=rstart; i<rend; i++) {
747     row_indx = i - rstart;
748     ierr     = MatGetRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr);
749     for (j=0; j<nz; j++) {
750       col = cols[j];
751       len = rows->len[row_indx]++;
752 
753       rows->ptrs[row_indx][len] = mapping[col];
754       rows->A[row_indx][len]    = vals[j];
755     }
756     rows->slen[row_indx] = rows->len[row_indx];
757 
758     ierr = MatRestoreRow(A,i,&nz,&cols,&vals);CHKERRQ(ierr);
759   }
760 
761 
762   /************************************************************/
763   /************** Set up the column structure *****************/
764   /*********************************************************/
765 
766   if (AT) {
767 
768     /* count number of nonzeros in every column */
769     for (i=rstart; i<rend; i++) {
770       ierr = MatGetRow(AT,i,&num_ptr[i-rstart],NULL,NULL);CHKERRQ(ierr);
771       ierr = MatRestoreRow(AT,i,&num_ptr[i-rstart],NULL,NULL);CHKERRQ(ierr);
772     }
773 
774     /* allocate buffers */
775     len = 0;
776     for (i=0; i<mnl; i++) {
777       if (len < num_ptr[i]) len = num_ptr[i];
778     }
779 
780     for (i=rstart; i<rend; i++) {
781       row_indx = i-rstart;
782       len      = num_ptr[row_indx];
783 
784       rows->rptrs[row_indx] = (int*)malloc(len*sizeof(int));
785     }
786 
787     /* copy the matrix (i.e., the structure) */
788     for (i=rstart; i<rend; i++) {
789       row_indx = i - rstart;
790       ierr     = MatGetRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr);
791       for (j=0; j<nz; j++) {
792         col = cols[j];
793         len = rows->rlen[row_indx]++;
794 
795         rows->rptrs[row_indx][len] = mapping[col];
796       }
797       ierr = MatRestoreRow(AT,i,&nz,&cols,&vals);CHKERRQ(ierr);
798     }
799   }
800 
801   ierr = PetscFree(num_ptr);CHKERRQ(ierr);
802   ierr = PetscFree(mapping);CHKERRQ(ierr);
803 
804   order_pointers(M);
805   M->maxnz = calc_maxnz(M);
806   *B       = M;
807   PetscFunctionReturn(0);
808 }
809 
810 /**********************************************************************/
811 
812 /*
813    Converts from an SPAI matrix B  to a PETSc matrix PB.
814    This assumes that the the SPAI matrix B is stored in
815    COMPRESSED-ROW format.
816 */
817 #undef __FUNCT__
818 #define __FUNCT__ "ConvertMatrixToMat"
819 PetscErrorCode ConvertMatrixToMat(MPI_Comm comm,matrix *B,Mat *PB)
820 {
821   PetscMPIInt    size,rank;
822   PetscErrorCode ierr;
823   int            m,n,M,N;
824   int            d_nz,o_nz;
825   int            *d_nnz,*o_nnz;
826   int            i,k,global_row,global_col,first_diag_col,last_diag_col;
827   PetscScalar    val;
828 
829   PetscFunctionBegin;
830   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
831   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
832 
833   m    = n = B->mnls[rank];
834   d_nz = o_nz = 0;
835 
836   /* Determine preallocation for MatCreateMPIAIJ */
837   ierr = PetscMalloc(m*sizeof(PetscInt),&d_nnz);CHKERRQ(ierr);
838   ierr = PetscMalloc(m*sizeof(PetscInt),&o_nnz);CHKERRQ(ierr);
839   for (i=0; i<m; i++) d_nnz[i] = o_nnz[i] = 0;
840   first_diag_col = B->start_indices[rank];
841   last_diag_col  = first_diag_col + B->mnls[rank];
842   for (i=0; i<B->mnls[rank]; i++) {
843     for (k=0; k<B->lines->len[i]; k++) {
844       global_col = B->lines->ptrs[i][k];
845       if ((global_col >= first_diag_col) && (global_col < last_diag_col)) d_nnz[i]++;
846       else o_nnz[i]++;
847     }
848   }
849 
850   M = N = B->n;
851   /* Here we only know how to create AIJ format */
852   ierr = MatCreate(comm,PB);CHKERRQ(ierr);
853   ierr = MatSetSizes(*PB,m,n,M,N);CHKERRQ(ierr);
854   ierr = MatSetType(*PB,MATAIJ);CHKERRQ(ierr);
855   ierr = MatSeqAIJSetPreallocation(*PB,d_nz,d_nnz);CHKERRQ(ierr);
856   ierr = MatMPIAIJSetPreallocation(*PB,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
857 
858   for (i=0; i<B->mnls[rank]; i++) {
859     global_row = B->start_indices[rank]+i;
860     for (k=0; k<B->lines->len[i]; k++) {
861       global_col = B->lines->ptrs[i][k];
862 
863       val  = B->lines->A[i][k];
864       ierr = MatSetValues(*PB,1,&global_row,1,&global_col,&val,ADD_VALUES);CHKERRQ(ierr);
865     }
866   }
867 
868   ierr = PetscFree(d_nnz);CHKERRQ(ierr);
869   ierr = PetscFree(o_nnz);CHKERRQ(ierr);
870 
871   ierr = MatAssemblyBegin(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
872   ierr = MatAssemblyEnd(*PB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
873   PetscFunctionReturn(0);
874 }
875 
876 /**********************************************************************/
877 
878 /*
879    Converts from an SPAI vector v  to a PETSc vec Pv.
880 */
881 #undef __FUNCT__
882 #define __FUNCT__ "ConvertVectorToVec"
883 PetscErrorCode ConvertVectorToVec(MPI_Comm comm,vector *v,Vec *Pv)
884 {
885   PetscErrorCode ierr;
886   PetscMPIInt    size,rank;
887   int            m,M,i,*mnls,*start_indices,*global_indices;
888 
889   PetscFunctionBegin;
890   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
891   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
892 
893   m = v->mnl;
894   M = v->n;
895 
896 
897   ierr = VecCreateMPI(comm,m,M,Pv);CHKERRQ(ierr);
898 
899   ierr = PetscMalloc(size*sizeof(int),&mnls);CHKERRQ(ierr);
900   ierr = MPI_Allgather(&v->mnl,1,MPI_INT,mnls,1,MPI_INT,comm);CHKERRQ(ierr);
901 
902   ierr = PetscMalloc(size*sizeof(int),&start_indices);CHKERRQ(ierr);
903 
904   start_indices[0] = 0;
905   for (i=1; i<size; i++) start_indices[i] = start_indices[i-1] +mnls[i-1];
906 
907   ierr = PetscMalloc(v->mnl*sizeof(int),&global_indices);CHKERRQ(ierr);
908   for (i=0; i<v->mnl; i++) global_indices[i] = start_indices[rank] + i;
909 
910   ierr = PetscFree(mnls);CHKERRQ(ierr);
911   ierr = PetscFree(start_indices);CHKERRQ(ierr);
912 
913   ierr = VecSetValues(*Pv,v->mnl,global_indices,v->v,INSERT_VALUES);CHKERRQ(ierr);
914   ierr = VecAssemblyBegin(*Pv);CHKERRQ(ierr);
915   ierr = VecAssemblyEnd(*Pv);CHKERRQ(ierr);
916 
917   ierr = PetscFree(global_indices);CHKERRQ(ierr);
918   PetscFunctionReturn(0);
919 }
920 
921 
922 
923 
924