xref: /petsc/src/mat/impls/kaij/kaij.c (revision 06d61a378946b6bd34af7e9a616ed6880d55aeca)
1 
2 /*
3   Defines the basic matrix operations for the KAIJ  matrix storage format.
4   This format is used to evaluate matrices of the form:
5 
6     [I \otimes S + A \otimes T]
7 
8   where
9     S is a dense (p \times q) matrix
10     T is a dense (p \times q) matrix
11     A is an AIJ  (n \times n) matrix
12     I is the identity matrix
13 
14   The resulting matrix is (np \times nq)
15 
16   We provide:
17      MatMult()
18      MatMultAdd()
19      MatInvertBlockDiagonal()
20   and
21      MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*)
22 
23   This single directory handles both the sequential and parallel codes
24 */
25 
26 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/
27 #include <../src/mat/utils/freespace.h>
28 #include <petsc/private/vecimpl.h>
29 
30 /*@C
31    MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix
32 
33    Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel
34 
35    Input Parameter:
36 .  A - the KAIJ matrix
37 
38    Output Parameter:
39 .  B - the AIJ matrix
40 
41    Level: advanced
42 
43    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
44 
45 .seealso: MatCreateKAIJ()
46 @*/
47 PetscErrorCode  MatKAIJGetAIJ(Mat A,Mat *B)
48 {
49   PetscErrorCode ierr;
50   PetscBool      ismpikaij,isseqkaij;
51 
52   PetscFunctionBegin;
53   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr);
54   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij);CHKERRQ(ierr);
55   if (ismpikaij) {
56     Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
57 
58     *B = b->A;
59   } else if (isseqkaij) {
60     Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
61 
62     *B = b->AIJ;
63   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix passed in is not of type KAIJ");
64   PetscFunctionReturn(0);
65 }
66 
67 /*@C
68    MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix
69 
70    Not Collective; the entire S is stored and returned independently on all processes.
71 
72    Input Parameter:
73 .  A - the KAIJ matrix
74 
75    Output Parameters:
76 +  m - the number of rows in S
77 .  n - the number of columns in S
78 -  S - the S matrix, in form of a scalar array in column-major format
79 
80    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
81 
82    Level: advanced
83 
84 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
85 @*/
86 PetscErrorCode MatKAIJGetS(Mat A,PetscInt *m,PetscInt *n,PetscScalar **S)
87 {
88   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
89   PetscFunctionBegin;
90   if (m) *m = b->p;
91   if (n) *n = b->q;
92   if (S) *S = b->S;
93   PetscFunctionReturn(0);
94 }
95 
96 /*@C
97    MatKAIJGetSRead - Get a read-only pointer to the S matrix describing the shift action of the KAIJ matrix
98 
99    Not Collective; the entire S is stored and returned independently on all processes.
100 
101    Input Parameter:
102 .  A - the KAIJ matrix
103 
104    Output Parameters:
105 +  m - the number of rows in S
106 .  n - the number of columns in S
107 -  S - the S matrix, in form of a scalar array in column-major format
108 
109    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
110 
111    Level: advanced
112 
113 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
114 @*/
115 PetscErrorCode MatKAIJGetSRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **S)
116 {
117   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
118   PetscFunctionBegin;
119   if (m) *m = b->p;
120   if (n) *n = b->q;
121   if (S) *S = b->S;
122   PetscFunctionReturn(0);
123 }
124 
125 /*@C
126   MatKAIJRestoreS - Restore array obtained with MatKAIJGetS()
127 
128   Not collective
129 
130   Input Parameter:
131 . A - the KAIJ matrix
132 
133   Output Parameter:
134 . S - location of pointer to array obtained with MatKAIJGetS()
135 
136   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
137   If NULL is passed, it will not attempt to zero the array pointer.
138 
139   Level: advanced
140 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead()
141 @*/
142 PetscErrorCode MatKAIJRestoreS(Mat A,PetscScalar **S)
143 {
144   PetscErrorCode ierr;
145 
146   PetscFunctionBegin;
147   if (S) *S = NULL;
148   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
149   PetscFunctionReturn(0);
150 }
151 
152 /*@C
153   MatKAIJRestoreSRead - Restore array obtained with MatKAIJGetSRead()
154 
155   Not collective
156 
157   Input Parameter:
158 . A - the KAIJ matrix
159 
160   Output Parameter:
161 . S - location of pointer to array obtained with MatKAIJGetS()
162 
163   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
164   If NULL is passed, it will not attempt to zero the array pointer.
165 
166   Level: advanced
167 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead()
168 @*/
169 PetscErrorCode MatKAIJRestoreSRead(Mat A,const PetscScalar **S)
170 {
171   PetscFunctionBegin;
172   if (S) *S = NULL;
173   PetscFunctionReturn(0);
174 }
175 
176 /*@C
177    MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix
178 
179    Not Collective; the entire T is stored and returned independently on all processes
180 
181    Input Parameter:
182 .  A - the KAIJ matrix
183 
184    Output Parameters:
185 +  m - the number of rows in T
186 .  n - the number of columns in T
187 -  T - the T matrix, in form of a scalar array in column-major format
188 
189    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
190 
191    Level: advanced
192 
193 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
194 @*/
195 PetscErrorCode MatKAIJGetT(Mat A,PetscInt *m,PetscInt *n,PetscScalar **T)
196 {
197   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
198   PetscFunctionBegin;
199   if (m) *m = b->p;
200   if (n) *n = b->q;
201   if (T) *T = b->T;
202   PetscFunctionReturn(0);
203 }
204 
205 /*@C
206    MatKAIJGetTRead - Get a read-only pointer to the transformation matrix T associated with the KAIJ matrix
207 
208    Not Collective; the entire T is stored and returned independently on all processes
209 
210    Input Parameter:
211 .  A - the KAIJ matrix
212 
213    Output Parameters:
214 +  m - the number of rows in T
215 .  n - the number of columns in T
216 -  T - the T matrix, in form of a scalar array in column-major format
217 
218    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
219 
220    Level: advanced
221 
222 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
223 @*/
224 PetscErrorCode MatKAIJGetTRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **T)
225 {
226   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
227   PetscFunctionBegin;
228   if (m) *m = b->p;
229   if (n) *n = b->q;
230   if (T) *T = b->T;
231   PetscFunctionReturn(0);
232 }
233 
234 /*@C
235   MatKAIJRestoreT - Restore array obtained with MatKAIJGetT()
236 
237   Not collective
238 
239   Input Parameter:
240 . A - the KAIJ matrix
241 
242   Output Parameter:
243 . T - location of pointer to array obtained with MatKAIJGetS()
244 
245   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
246   If NULL is passed, it will not attempt to zero the array pointer.
247 
248   Level: advanced
249 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead()
250 @*/
251 PetscErrorCode MatKAIJRestoreT(Mat A,PetscScalar **T)
252 {
253   PetscErrorCode ierr;
254 
255   PetscFunctionBegin;
256   if (T) *T = NULL;
257   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
258   PetscFunctionReturn(0);
259 }
260 
261 /*@C
262   MatKAIJRestoreTRead - Restore array obtained with MatKAIJGetTRead()
263 
264   Not collective
265 
266   Input Parameter:
267 . A - the KAIJ matrix
268 
269   Output Parameter:
270 . T - location of pointer to array obtained with MatKAIJGetS()
271 
272   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
273   If NULL is passed, it will not attempt to zero the array pointer.
274 
275   Level: advanced
276 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead()
277 @*/
278 PetscErrorCode MatKAIJRestoreTRead(Mat A,const PetscScalar **T)
279 {
280   PetscFunctionBegin;
281   if (T) *T = NULL;
282   PetscFunctionReturn(0);
283 }
284 
285 /*@
286    MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix
287 
288    Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel
289 
290    Input Parameters:
291 +  A - the KAIJ matrix
292 -  B - the AIJ matrix
293 
294    Notes:
295    This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed.
296    Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix.
297 
298    Level: advanced
299 
300 .seealso: MatKAIJGetAIJ(), MatKAIJSetS(), MatKAIJSetT()
301 @*/
302 PetscErrorCode MatKAIJSetAIJ(Mat A,Mat B)
303 {
304   PetscErrorCode ierr;
305   PetscMPIInt    size;
306   PetscBool      flg;
307 
308   PetscFunctionBegin;
309   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
310   if (size == 1) {
311     ierr = PetscObjectTypeCompare((PetscObject)B,MATSEQAIJ,&flg);CHKERRQ(ierr);
312     if (!flg) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"MatKAIJSetAIJ() with MATSEQKAIJ does not support %s as the AIJ mat",((PetscObject)B)->type_name);
313     Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
314     a->AIJ = B;
315   } else {
316     Mat_MPIKAIJ *a = (Mat_MPIKAIJ*)A->data;
317     a->A = B;
318   }
319   ierr = PetscObjectReference((PetscObject)B);CHKERRQ(ierr);
320   PetscFunctionReturn(0);
321 }
322 
323 /*@C
324    MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix
325 
326    Logically Collective; the entire S is stored independently on all processes.
327 
328    Input Parameters:
329 +  A - the KAIJ matrix
330 .  p - the number of rows in S
331 .  q - the number of columns in S
332 -  S - the S matrix, in form of a scalar array in column-major format
333 
334    Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix.
335    The S matrix is copied, so the user can destroy this array.
336 
337    Level: Advanced
338 
339 .seealso: MatKAIJGetS(), MatKAIJSetT(), MatKAIJSetAIJ()
340 @*/
341 PetscErrorCode MatKAIJSetS(Mat A,PetscInt p,PetscInt q,const PetscScalar S[])
342 {
343   PetscErrorCode ierr;
344   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
345 
346   PetscFunctionBegin;
347   ierr = PetscFree(a->S);CHKERRQ(ierr);
348   if (S) {
349     ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->S);CHKERRQ(ierr);
350     ierr = PetscMemcpy(a->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
351   } else  a->S = NULL;
352 
353   a->p = p;
354   a->q = q;
355   PetscFunctionReturn(0);
356 }
357 
358 /*@C
359    MatKAIJGetScaledIdentity - Check if both S and T are scaled identities.
360 
361    Logically Collective.
362 
363    Input Parameter:
364 .  A - the KAIJ matrix
365 
366   Output Parameter:
367 .  identity - the Boolean value
368 
369    Level: Advanced
370 
371 .seealso: MatKAIJGetS(), MatKAIJGetT()
372 @*/
373 PetscErrorCode MatKAIJGetScaledIdentity(Mat A,PetscBool* identity)
374 {
375   Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
376   PetscInt    i,j;
377 
378   PetscFunctionBegin;
379   if (a->p != a->q) {
380     *identity = PETSC_FALSE;
381     PetscFunctionReturn(0);
382   } else *identity = PETSC_TRUE;
383   if (!a->isTI || a->S) {
384     for (i=0; i<a->p && *identity; i++) {
385       for (j=0; j<a->p && *identity; j++) {
386         if (i != j) {
387           if (a->S && PetscAbsScalar(a->S[i+j*a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
388           if (a->T && PetscAbsScalar(a->T[i+j*a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
389         } else {
390           if (a->S && PetscAbsScalar(a->S[i*(a->p+1)]-a->S[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
391           if (a->T && PetscAbsScalar(a->T[i*(a->p+1)]-a->T[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
392         }
393       }
394     }
395   }
396   PetscFunctionReturn(0);
397 }
398 
399 /*@C
400    MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix
401 
402    Logically Collective; the entire T is stored independently on all processes.
403 
404    Input Parameters:
405 +  A - the KAIJ matrix
406 .  p - the number of rows in S
407 .  q - the number of columns in S
408 -  T - the T matrix, in form of a scalar array in column-major format
409 
410    Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix.
411    The T matrix is copied, so the user can destroy this array.
412 
413    Level: Advanced
414 
415 .seealso: MatKAIJGetT(), MatKAIJSetS(), MatKAIJSetAIJ()
416 @*/
417 PetscErrorCode MatKAIJSetT(Mat A,PetscInt p,PetscInt q,const PetscScalar T[])
418 {
419   PetscErrorCode ierr;
420   PetscInt       i,j;
421   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
422   PetscBool      isTI = PETSC_FALSE;
423 
424   PetscFunctionBegin;
425   /* check if T is an identity matrix */
426   if (T && (p == q)) {
427     isTI = PETSC_TRUE;
428     for (i=0; i<p; i++) {
429       for (j=0; j<q; j++) {
430         if (i == j) {
431           /* diagonal term must be 1 */
432           if (T[i+j*p] != 1.0) isTI = PETSC_FALSE;
433         } else {
434           /* off-diagonal term must be 0 */
435           if (T[i+j*p] != 0.0) isTI = PETSC_FALSE;
436         }
437       }
438     }
439   }
440   a->isTI = isTI;
441 
442   ierr = PetscFree(a->T);CHKERRQ(ierr);
443   if (T && (!isTI)) {
444     ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->T);CHKERRQ(ierr);
445     ierr = PetscMemcpy(a->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
446   } else a->T = NULL;
447 
448   a->p = p;
449   a->q = q;
450   PetscFunctionReturn(0);
451 }
452 
453 PetscErrorCode MatDestroy_SeqKAIJ(Mat A)
454 {
455   PetscErrorCode ierr;
456   Mat_SeqKAIJ    *b = (Mat_SeqKAIJ*)A->data;
457 
458   PetscFunctionBegin;
459   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
460   ierr = PetscFree(b->S);CHKERRQ(ierr);
461   ierr = PetscFree(b->T);CHKERRQ(ierr);
462   ierr = PetscFree(b->ibdiag);CHKERRQ(ierr);
463   ierr = PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr);CHKERRQ(ierr);
464   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqkaij_seqaij_C",NULL);CHKERRQ(ierr);
465   ierr = PetscFree(A->data);CHKERRQ(ierr);
466   PetscFunctionReturn(0);
467 }
468 
469 PETSC_INTERN PetscErrorCode MatKAIJ_build_AIJ_OAIJ(Mat A)
470 {
471   PetscErrorCode   ierr;
472   Mat_MPIKAIJ      *a;
473   Mat_MPIAIJ       *mpiaij;
474   PetscScalar      *T;
475   PetscInt         i,j;
476   PetscObjectState state;
477 
478   PetscFunctionBegin;
479   a = (Mat_MPIKAIJ*)A->data;
480   mpiaij = (Mat_MPIAIJ*)a->A->data;
481 
482   ierr = PetscObjectStateGet((PetscObject)a->A,&state);CHKERRQ(ierr);
483   if (state == a->state) {
484     /* The existing AIJ and KAIJ members are up-to-date, so simply exit. */
485     PetscFunctionReturn(0);
486   } else {
487     ierr = MatDestroy(&a->AIJ);CHKERRQ(ierr);
488     ierr = MatDestroy(&a->OAIJ);CHKERRQ(ierr);
489     if (a->isTI) {
490       /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL.
491        * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will
492        * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix
493        * to pass in. */
494       ierr = PetscMalloc1(a->p*a->q*sizeof(PetscScalar),&T);CHKERRQ(ierr);
495       for (i=0; i<a->p; i++) {
496         for (j=0; j<a->q; j++) {
497           if (i==j) T[i+j*a->p] = 1.0;
498           else      T[i+j*a->p] = 0.0;
499         }
500       }
501     } else T = a->T;
502     ierr = MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ);CHKERRQ(ierr);
503     ierr = MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ);CHKERRQ(ierr);
504     if (a->isTI) {
505       ierr = PetscFree(T);CHKERRQ(ierr);
506     }
507     a->state = state;
508   }
509 
510   PetscFunctionReturn(0);
511 }
512 
513 PetscErrorCode MatSetUp_KAIJ(Mat A)
514 {
515   PetscErrorCode ierr;
516   PetscInt       n;
517   PetscMPIInt    size;
518   Mat_SeqKAIJ    *seqkaij = (Mat_SeqKAIJ*)A->data;
519 
520   PetscFunctionBegin;
521   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
522   if (size == 1) {
523     ierr = MatSetSizes(A,seqkaij->p*seqkaij->AIJ->rmap->n,seqkaij->q*seqkaij->AIJ->cmap->n,seqkaij->p*seqkaij->AIJ->rmap->N,seqkaij->q*seqkaij->AIJ->cmap->N);CHKERRQ(ierr);
524     ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr);
525     ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr);
526     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
527     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
528   } else {
529     Mat_MPIKAIJ *a;
530     Mat_MPIAIJ  *mpiaij;
531     IS          from,to;
532     Vec         gvec;
533 
534     a = (Mat_MPIKAIJ*)A->data;
535     mpiaij = (Mat_MPIAIJ*)a->A->data;
536     ierr = MatSetSizes(A,a->p*a->A->rmap->n,a->q*a->A->cmap->n,a->p*a->A->rmap->N,a->q*a->A->cmap->N);CHKERRQ(ierr);
537     ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr);
538     ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr);
539     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
540     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
541 
542     ierr = MatKAIJ_build_AIJ_OAIJ(A);CHKERRQ(ierr);
543 
544     ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
545     ierr = VecCreate(PETSC_COMM_SELF,&a->w);CHKERRQ(ierr);
546     ierr = VecSetSizes(a->w,n*a->q,n*a->q);CHKERRQ(ierr);
547     ierr = VecSetBlockSize(a->w,a->q);CHKERRQ(ierr);
548     ierr = VecSetType(a->w,VECSEQ);CHKERRQ(ierr);
549 
550     /* create two temporary Index sets for build scatter gather */
551     ierr = ISCreateBlock(PetscObjectComm((PetscObject)a->A),a->q,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
552     ierr = ISCreateStride(PETSC_COMM_SELF,n*a->q,0,1,&to);CHKERRQ(ierr);
553 
554     /* create temporary global vector to generate scatter context */
555     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A),a->q,a->q*a->A->cmap->n,a->q*a->A->cmap->N,NULL,&gvec);CHKERRQ(ierr);
556 
557     /* generate the scatter context */
558     ierr = VecScatterCreate(gvec,from,a->w,to,&a->ctx);CHKERRQ(ierr);
559 
560     ierr = ISDestroy(&from);CHKERRQ(ierr);
561     ierr = ISDestroy(&to);CHKERRQ(ierr);
562     ierr = VecDestroy(&gvec);CHKERRQ(ierr);
563   }
564 
565   A->assembled = PETSC_TRUE;
566   PetscFunctionReturn(0);
567 }
568 
569 PetscErrorCode MatView_KAIJ(Mat A,PetscViewer viewer)
570 {
571   PetscViewerFormat format;
572   Mat_SeqKAIJ       *a = (Mat_SeqKAIJ*)A->data;
573   Mat               B;
574   PetscInt          i;
575   PetscErrorCode    ierr;
576   PetscBool         ismpikaij;
577 
578   PetscFunctionBegin;
579   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr);
580   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
581   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) {
582     ierr = PetscViewerASCIIPrintf(viewer,"S and T have %" PetscInt_FMT " rows and %" PetscInt_FMT " columns\n",a->p,a->q);CHKERRQ(ierr);
583 
584     /* Print appropriate details for S. */
585     if (!a->S) {
586       ierr = PetscViewerASCIIPrintf(viewer,"S is NULL\n");CHKERRQ(ierr);
587     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
588       ierr = PetscViewerASCIIPrintf(viewer,"Entries of S are ");CHKERRQ(ierr);
589       for (i=0; i<(a->p * a->q); i++) {
590 #if defined(PETSC_USE_COMPLEX)
591         ierr = PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->S[i]),(double)PetscImaginaryPart(a->S[i]));CHKERRQ(ierr);
592 #else
593         ierr = PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->S[i]));CHKERRQ(ierr);
594 #endif
595       }
596       ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
597     }
598 
599     /* Print appropriate details for T. */
600     if (a->isTI) {
601       ierr = PetscViewerASCIIPrintf(viewer,"T is the identity matrix\n");CHKERRQ(ierr);
602     } else if (!a->T) {
603       ierr = PetscViewerASCIIPrintf(viewer,"T is NULL\n");CHKERRQ(ierr);
604     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
605       ierr = PetscViewerASCIIPrintf(viewer,"Entries of T are ");CHKERRQ(ierr);
606       for (i=0; i<(a->p * a->q); i++) {
607 #if defined(PETSC_USE_COMPLEX)
608         ierr = PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->T[i]),(double)PetscImaginaryPart(a->T[i]));CHKERRQ(ierr);
609 #else
610         ierr = PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->T[i]));CHKERRQ(ierr);
611 #endif
612       }
613       ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
614     }
615 
616     /* Now print details for the AIJ matrix, using the AIJ viewer. */
617     ierr = PetscViewerASCIIPrintf(viewer,"Now viewing the associated AIJ matrix:\n");CHKERRQ(ierr);
618     if (ismpikaij) {
619       Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
620       ierr = MatView(b->A,viewer);CHKERRQ(ierr);
621     } else {
622       ierr = MatView(a->AIJ,viewer);CHKERRQ(ierr);
623     }
624 
625   } else {
626     /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */
627     ierr = MatConvert(A,MATAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
628     ierr = MatView(B,viewer);CHKERRQ(ierr);
629     ierr = MatDestroy(&B);CHKERRQ(ierr);
630   }
631   PetscFunctionReturn(0);
632 }
633 
634 PetscErrorCode MatDestroy_MPIKAIJ(Mat A)
635 {
636   PetscErrorCode ierr;
637   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
638 
639   PetscFunctionBegin;
640   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
641   ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr);
642   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
643   ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr);
644   ierr = VecDestroy(&b->w);CHKERRQ(ierr);
645   ierr = PetscFree(b->S);CHKERRQ(ierr);
646   ierr = PetscFree(b->T);CHKERRQ(ierr);
647   ierr = PetscFree(b->ibdiag);CHKERRQ(ierr);
648   ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
649   ierr = PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpikaij_mpiaij_C",NULL);CHKERRQ(ierr);
650   ierr = PetscFree(A->data);CHKERRQ(ierr);
651   PetscFunctionReturn(0);
652 }
653 
654 /* --------------------------------------------------------------------------------------*/
655 
656 /* zz = yy + Axx */
657 PetscErrorCode MatMultAdd_SeqKAIJ(Mat A,Vec xx,Vec yy,Vec zz)
658 {
659   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ*)A->data;
660   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
661   const PetscScalar *s = b->S, *t = b->T;
662   const PetscScalar *x,*v,*bx;
663   PetscScalar       *y,*sums;
664   PetscErrorCode    ierr;
665   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
666   PetscInt          n,i,jrow,j,l,p=b->p,q=b->q,k;
667 
668   PetscFunctionBegin;
669   if (!yy) {
670     ierr = VecSet(zz,0.0);CHKERRQ(ierr);
671   } else {
672     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
673   }
674   if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0);
675 
676   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
677   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
678   idx  = a->j;
679   v    = a->a;
680   ii   = a->i;
681 
682   if (b->isTI) {
683     for (i=0; i<m; i++) {
684       jrow = ii[i];
685       n    = ii[i+1] - jrow;
686       sums = y + p*i;
687       for (j=0; j<n; j++) {
688         for (k=0; k<p; k++) {
689           sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k];
690         }
691       }
692     }
693     ierr = PetscLogFlops(3.0*(a->nz)*p);CHKERRQ(ierr);
694   } else if (t) {
695     for (i=0; i<m; i++) {
696       jrow = ii[i];
697       n    = ii[i+1] - jrow;
698       sums = y + p*i;
699       for (j=0; j<n; j++) {
700         for (k=0; k<p; k++) {
701           for (l=0; l<q; l++) {
702             sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l];
703           }
704         }
705       }
706     }
707     /* The flop count below assumes that v[jrow+j] is hoisted out (which an optimizing compiler is likely to do),
708      * and also that T part is hoisted outside this loop (in exchange for temporary storage) as (A \otimes I) (I \otimes T),
709      * so that this multiply doesn't have to be redone for each matrix entry, but just once per column. The latter
710      * transformation is much less likely to be applied, but we nonetheless count the minimum flops required. */
711     ierr = PetscLogFlops((2.0*p*q-p)*m+2.0*p*a->nz);CHKERRQ(ierr);
712   }
713   if (s) {
714     for (i=0; i<m; i++) {
715       sums = y + p*i;
716       bx   = x + q*i;
717       if (i < b->AIJ->cmap->n) {
718         for (j=0; j<q; j++) {
719           for (k=0; k<p; k++) {
720             sums[k] += s[k+j*p]*bx[j];
721           }
722         }
723       }
724     }
725     ierr = PetscLogFlops(2.0*m*p*q);CHKERRQ(ierr);
726   }
727 
728   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
729   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
730   PetscFunctionReturn(0);
731 }
732 
733 PetscErrorCode MatMult_SeqKAIJ(Mat A,Vec xx,Vec yy)
734 {
735   PetscErrorCode ierr;
736   PetscFunctionBegin;
737   ierr = MatMultAdd_SeqKAIJ(A,xx,PETSC_NULL,yy);CHKERRQ(ierr);
738   PetscFunctionReturn(0);
739 }
740 
741 #include <petsc/private/kernels/blockinvert.h>
742 
743 PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A,const PetscScalar **values)
744 {
745   Mat_SeqKAIJ       *b  = (Mat_SeqKAIJ*)A->data;
746   Mat_SeqAIJ        *a  = (Mat_SeqAIJ*)b->AIJ->data;
747   const PetscScalar *S  = b->S;
748   const PetscScalar *T  = b->T;
749   const PetscScalar *v  = a->a;
750   const PetscInt     p  = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i;
751   PetscErrorCode    ierr;
752   PetscInt          i,j,*v_pivots,dof,dof2;
753   PetscScalar       *diag,aval,*v_work;
754 
755   PetscFunctionBegin;
756   if (p != q) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse.");
757   if ((!S) && (!T) && (!b->isTI)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix.");
758 
759   dof  = p;
760   dof2 = dof*dof;
761 
762   if (b->ibdiagvalid) {
763     if (values) *values = b->ibdiag;
764     PetscFunctionReturn(0);
765   }
766   if (!b->ibdiag) {
767     ierr = PetscMalloc1(dof2*m*sizeof(PetscScalar),&b->ibdiag);CHKERRQ(ierr);
768     ierr = PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar));CHKERRQ(ierr);
769   }
770   if (values) *values = b->ibdiag;
771   diag = b->ibdiag;
772 
773   ierr = PetscMalloc2(dof,&v_work,dof,&v_pivots);CHKERRQ(ierr);
774   for (i=0; i<m; i++) {
775     if (S) {
776       ierr = PetscMemcpy(diag,S,dof2*sizeof(PetscScalar));CHKERRQ(ierr);
777     } else {
778       ierr = PetscMemzero(diag,dof2*sizeof(PetscScalar));CHKERRQ(ierr);
779     }
780     if (b->isTI) {
781       aval = 0;
782       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
783       for (j=0; j<dof; j++) diag[j+dof*j] += aval;
784     } else if (T) {
785       aval = 0;
786       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
787       for (j=0; j<dof2; j++) diag[j] += aval*T[j];
788     }
789     ierr = PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL);CHKERRQ(ierr);
790     diag += dof2;
791   }
792   ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
793 
794   b->ibdiagvalid = PETSC_TRUE;
795   PetscFunctionReturn(0);
796 }
797 
798 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B)
799 {
800   Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data;
801 
802   PetscFunctionBegin;
803   *B = kaij->AIJ;
804   PetscFunctionReturn(0);
805 }
806 
807 PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
808 {
809   PetscErrorCode    ierr;
810   Mat_SeqKAIJ       *kaij = (Mat_SeqKAIJ*) A->data;
811   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)kaij->AIJ->data;
812   const PetscScalar *aa = a->a, *T = kaij->T, *v;
813   const PetscInt    m  = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi;
814   const PetscScalar *b, *xb, *idiag;
815   PetscScalar       *x, *work, *workt, *w, *y, *arr, *t, *arrt;
816   PetscInt          i, j, k, i2, bs, bs2, nz;
817 
818   PetscFunctionBegin;
819   its = its*lits;
820   if (flag & SOR_EISENSTAT) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
821   if (its <= 0)             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive",its,lits);
822   if (fshift)               SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift");
823   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts");
824   if (p != q) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: No support for non-square dense blocks");
825   else        {bs = p; bs2 = bs*bs; }
826 
827   if (!m) PetscFunctionReturn(0);
828 
829   if (!kaij->ibdiagvalid) { ierr = MatInvertBlockDiagonal_SeqKAIJ(A,NULL);CHKERRQ(ierr); }
830   idiag = kaij->ibdiag;
831   diag  = a->diag;
832 
833   if (!kaij->sor.setup) {
834     ierr = PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr);CHKERRQ(ierr);
835     kaij->sor.setup = PETSC_TRUE;
836   }
837   y     = kaij->sor.y;
838   w     = kaij->sor.w;
839   work  = kaij->sor.work;
840   t     = kaij->sor.t;
841   arr   = kaij->sor.arr;
842 
843   ierr = VecGetArray(xx,&x);    CHKERRQ(ierr);
844   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
845 
846   if (flag & SOR_ZERO_INITIAL_GUESS) {
847     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
848       PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);                            /* x[0:bs] <- D^{-1} b[0:bs] */
849       ierr   =  PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
850       i2     =  bs;
851       idiag  += bs2;
852       for (i=1; i<m; i++) {
853         v  = aa + ai[i];
854         vi = aj + ai[i];
855         nz = diag[i] - ai[i];
856 
857         if (T) {                /* b - T (Arow * x) */
858           ierr = PetscMemzero(w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
859           for (j=0; j<nz; j++) {
860             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
861           }
862           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]);
863           for (k=0; k<bs; k++) t[i2+k] += b[i2+k];
864         } else if (kaij->isTI) {
865           ierr = PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
866           for (j=0; j<nz; j++) {
867             for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k];
868           }
869         } else {
870           ierr = PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
871         }
872 
873         PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y);
874         for (j=0; j<bs; j++) x[i2+j] = omega * y[j];
875 
876         idiag += bs2;
877         i2    += bs;
878       }
879       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
880       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
881       xb = t;
882     } else xb = b;
883     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
884       idiag = kaij->ibdiag+bs2*(m-1);
885       i2    = bs * (m-1);
886       ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
887       PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
888       i2    -= bs;
889       idiag -= bs2;
890       for (i=m-2; i>=0; i--) {
891         v  = aa + diag[i] + 1 ;
892         vi = aj + diag[i] + 1;
893         nz = ai[i+1] - diag[i] - 1;
894 
895         if (T) {                /* FIXME: This branch untested */
896           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
897           /* copy all rows of x that are needed into contiguous space */
898           workt = work;
899           for (j=0; j<nz; j++) {
900             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
901             workt += bs;
902           }
903           arrt = arr;
904           for (j=0; j<nz; j++) {
905             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
906             for (k=0; k<bs2; k++) arrt[k] *= v[j];
907             arrt += bs2;
908           }
909           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
910         } else if (kaij->isTI) {
911           ierr = PetscMemcpy(w,t+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
912           for (j=0; j<nz; j++) {
913             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
914           }
915         }
916 
917         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */
918         for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j];
919 
920         idiag -= bs2;
921         i2    -= bs;
922       }
923       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
924     }
925     its--;
926   }
927   while (its--) {               /* FIXME: This branch not updated */
928     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
929       i2     =  0;
930       idiag  = kaij->ibdiag;
931       for (i=0; i<m; i++) {
932         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
933 
934         v  = aa + ai[i];
935         vi = aj + ai[i];
936         nz = diag[i] - ai[i];
937         workt = work;
938         for (j=0; j<nz; j++) {
939           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
940           workt += bs;
941         }
942         arrt = arr;
943         if (T) {
944           for (j=0; j<nz; j++) {
945             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
946             for (k=0; k<bs2; k++) arrt[k] *= v[j];
947             arrt += bs2;
948           }
949           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
950         } else if (kaij->isTI) {
951           for (j=0; j<nz; j++) {
952             ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
953             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
954             arrt += bs2;
955           }
956           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
957         }
958         ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
959 
960         v  = aa + diag[i] + 1;
961         vi = aj + diag[i] + 1;
962         nz = ai[i+1] - diag[i] - 1;
963         workt = work;
964         for (j=0; j<nz; j++) {
965           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
966           workt += bs;
967         }
968         arrt = arr;
969         if (T) {
970           for (j=0; j<nz; j++) {
971             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
972             for (k=0; k<bs2; k++) arrt[k] *= v[j];
973             arrt += bs2;
974           }
975           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
976         } else if (kaij->isTI) {
977           for (j=0; j<nz; j++) {
978             ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
979             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
980             arrt += bs2;
981           }
982           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
983         }
984 
985         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
986         for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
987 
988         idiag += bs2;
989         i2    += bs;
990       }
991       xb = t;
992     }
993     else xb = b;
994     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
995       idiag = kaij->ibdiag+bs2*(m-1);
996       i2    = bs * (m-1);
997       if (xb == b) {
998         for (i=m-1; i>=0; i--) {
999           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1000 
1001           v  = aa + ai[i];
1002           vi = aj + ai[i];
1003           nz = diag[i] - ai[i];
1004           workt = work;
1005           for (j=0; j<nz; j++) {
1006             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1007             workt += bs;
1008           }
1009           arrt = arr;
1010           if (T) {
1011             for (j=0; j<nz; j++) {
1012               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
1013               for (k=0; k<bs2; k++) arrt[k] *= v[j];
1014               arrt += bs2;
1015             }
1016             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1017           } else if (kaij->isTI) {
1018             for (j=0; j<nz; j++) {
1019               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
1020               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
1021               arrt += bs2;
1022             }
1023             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1024           }
1025 
1026           v  = aa + diag[i] + 1;
1027           vi = aj + diag[i] + 1;
1028           nz = ai[i+1] - diag[i] - 1;
1029           workt = work;
1030           for (j=0; j<nz; j++) {
1031             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1032             workt += bs;
1033           }
1034           arrt = arr;
1035           if (T) {
1036             for (j=0; j<nz; j++) {
1037               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
1038               for (k=0; k<bs2; k++) arrt[k] *= v[j];
1039               arrt += bs2;
1040             }
1041             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1042           } else if (kaij->isTI) {
1043             for (j=0; j<nz; j++) {
1044               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
1045               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
1046               arrt += bs2;
1047             }
1048             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1049           }
1050 
1051           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
1052           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
1053         }
1054       } else {
1055         for (i=m-1; i>=0; i--) {
1056           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1057           v  = aa + diag[i] + 1;
1058           vi = aj + diag[i] + 1;
1059           nz = ai[i+1] - diag[i] - 1;
1060           workt = work;
1061           for (j=0; j<nz; j++) {
1062             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1063             workt += bs;
1064           }
1065           arrt = arr;
1066           if (T) {
1067             for (j=0; j<nz; j++) {
1068               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
1069               for (k=0; k<bs2; k++) arrt[k] *= v[j];
1070               arrt += bs2;
1071             }
1072             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1073           } else if (kaij->isTI) {
1074             for (j=0; j<nz; j++) {
1075               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
1076               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
1077               arrt += bs2;
1078             }
1079             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1080           }
1081           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
1082           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
1083         }
1084       }
1085       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1086     }
1087   }
1088 
1089   ierr = VecRestoreArray(xx,&x);    CHKERRQ(ierr);
1090   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1091   PetscFunctionReturn(0);
1092 }
1093 
1094 /*===================================================================================*/
1095 
1096 PetscErrorCode MatMultAdd_MPIKAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1097 {
1098   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
1099   PetscErrorCode ierr;
1100 
1101   PetscFunctionBegin;
1102   if (!yy) {
1103     ierr = VecSet(zz,0.0);CHKERRQ(ierr);
1104   } else {
1105     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
1106   }
1107   ierr = MatKAIJ_build_AIJ_OAIJ(A);CHKERRQ(ierr); /* Ensure b->AIJ and b->OAIJ are up to date. */
1108   /* start the scatter */
1109   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1110   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz);CHKERRQ(ierr);
1111   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1112   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
1113   PetscFunctionReturn(0);
1114 }
1115 
1116 PetscErrorCode MatMult_MPIKAIJ(Mat A,Vec xx,Vec yy)
1117 {
1118   PetscErrorCode ierr;
1119   PetscFunctionBegin;
1120   ierr = MatMultAdd_MPIKAIJ(A,xx,PETSC_NULL,yy);CHKERRQ(ierr);
1121   PetscFunctionReturn(0);
1122 }
1123 
1124 PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A,const PetscScalar **values)
1125 {
1126   Mat_MPIKAIJ     *b = (Mat_MPIKAIJ*)A->data;
1127   PetscErrorCode  ierr;
1128 
1129   PetscFunctionBegin;
1130   ierr = MatKAIJ_build_AIJ_OAIJ(A);CHKERRQ(ierr); /* Ensure b->AIJ is up to date. */
1131   ierr = (*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values);CHKERRQ(ierr);
1132   PetscFunctionReturn(0);
1133 }
1134 
1135 /* ----------------------------------------------------------------*/
1136 
1137 PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
1138 {
1139   Mat_SeqKAIJ     *b   = (Mat_SeqKAIJ*) A->data;
1140   PetscErrorCode  diag = PETSC_FALSE;
1141   PetscErrorCode  ierr;
1142   PetscInt        nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c;
1143   PetscScalar     *vaij,*v,*S=b->S,*T=b->T;
1144 
1145   PetscFunctionBegin;
1146   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1147   b->getrowactive = PETSC_TRUE;
1148   if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range",row);
1149 
1150   if ((!S) && (!T) && (!b->isTI)) {
1151     if (ncols)    *ncols  = 0;
1152     if (cols)     *cols   = NULL;
1153     if (values)   *values = NULL;
1154     PetscFunctionReturn(0);
1155   }
1156 
1157   if (T || b->isTI) {
1158     ierr  = MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij);CHKERRQ(ierr);
1159     c     = nzaij;
1160     for (i=0; i<nzaij; i++) {
1161       /* check if this row contains a diagonal entry */
1162       if (colsaij[i] == r) {
1163         diag = PETSC_TRUE;
1164         c = i;
1165       }
1166     }
1167   } else nzaij = c = 0;
1168 
1169   /* calculate size of row */
1170   nz = 0;
1171   if (S)            nz += q;
1172   if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q);
1173 
1174   if (cols || values) {
1175     ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr);
1176     for (i=0; i<q; i++) {
1177       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1178       v[i] = 0.0;
1179     }
1180     if (b->isTI) {
1181       for (i=0; i<nzaij; i++) {
1182         for (j=0; j<q; j++) {
1183           idx[i*q+j] = colsaij[i]*q+j;
1184           v[i*q+j]   = (j==s ? vaij[i] : 0);
1185         }
1186       }
1187     } else if (T) {
1188       for (i=0; i<nzaij; i++) {
1189         for (j=0; j<q; j++) {
1190           idx[i*q+j] = colsaij[i]*q+j;
1191           v[i*q+j]   = vaij[i]*T[s+j*p];
1192         }
1193       }
1194     }
1195     if (S) {
1196       for (j=0; j<q; j++) {
1197         idx[c*q+j] = r*q+j;
1198         v[c*q+j]  += S[s+j*p];
1199       }
1200     }
1201   }
1202 
1203   if (ncols)    *ncols  = nz;
1204   if (cols)     *cols   = idx;
1205   if (values)   *values = v;
1206   PetscFunctionReturn(0);
1207 }
1208 
1209 PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1210 {
1211   PetscErrorCode ierr;
1212 
1213   PetscFunctionBegin;
1214   if (nz) *nz = 0;
1215   ierr = PetscFree2(*idx,*v);CHKERRQ(ierr);
1216   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
1217   PetscFunctionReturn(0);
1218 }
1219 
1220 PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
1221 {
1222   Mat_MPIKAIJ     *b      = (Mat_MPIKAIJ*) A->data;
1223   Mat             AIJ     = b->A;
1224   PetscBool       diag    = PETSC_FALSE;
1225   Mat             MatAIJ,MatOAIJ;
1226   PetscErrorCode  ierr;
1227   const PetscInt  rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray;
1228   PetscInt        nz,*idx,ncolsaij = 0,ncolsoaij = 0,*colsaij,*colsoaij,r,s,c,i,j,lrow;
1229   PetscScalar     *v,*vals,*ovals,*S=b->S,*T=b->T;
1230 
1231   PetscFunctionBegin;
1232   ierr = MatKAIJ_build_AIJ_OAIJ(A);CHKERRQ(ierr); /* Ensure b->AIJ and b->OAIJ are up to date. */
1233   MatAIJ  = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ;
1234   MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ;
1235   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1236   b->getrowactive = PETSC_TRUE;
1237   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1238   lrow = row - rstart;
1239 
1240   if ((!S) && (!T) && (!b->isTI)) {
1241     if (ncols)    *ncols  = 0;
1242     if (cols)     *cols   = NULL;
1243     if (values)   *values = NULL;
1244     PetscFunctionReturn(0);
1245   }
1246 
1247   r = lrow/p;
1248   s = lrow%p;
1249 
1250   if (T || b->isTI) {
1251     ierr = MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray);CHKERRQ(ierr);
1252     ierr = MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals);CHKERRQ(ierr);
1253     ierr = MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals);CHKERRQ(ierr);
1254 
1255     c     = ncolsaij + ncolsoaij;
1256     for (i=0; i<ncolsaij; i++) {
1257       /* check if this row contains a diagonal entry */
1258       if (colsaij[i] == r) {
1259         diag = PETSC_TRUE;
1260         c = i;
1261       }
1262     }
1263   } else c = 0;
1264 
1265   /* calculate size of row */
1266   nz = 0;
1267   if (S)            nz += q;
1268   if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q);
1269 
1270   if (cols || values) {
1271     ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr);
1272     for (i=0; i<q; i++) {
1273       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1274       v[i] = 0.0;
1275     }
1276     if (b->isTI) {
1277       for (i=0; i<ncolsaij; i++) {
1278         for (j=0; j<q; j++) {
1279           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1280           v[i*q+j]   = (j==s ? vals[i] : 0.0);
1281         }
1282       }
1283       for (i=0; i<ncolsoaij; i++) {
1284         for (j=0; j<q; j++) {
1285           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1286           v[(i+ncolsaij)*q+j]   = (j==s ? ovals[i]: 0.0);
1287         }
1288       }
1289     } else if (T) {
1290       for (i=0; i<ncolsaij; i++) {
1291         for (j=0; j<q; j++) {
1292           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1293           v[i*q+j]   = vals[i]*T[s+j*p];
1294         }
1295       }
1296       for (i=0; i<ncolsoaij; i++) {
1297         for (j=0; j<q; j++) {
1298           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1299           v[(i+ncolsaij)*q+j]   = ovals[i]*T[s+j*p];
1300         }
1301       }
1302     }
1303     if (S) {
1304       for (j=0; j<q; j++) {
1305         idx[c*q+j] = (r+rstart/p)*q+j;
1306         v[c*q+j]  += S[s+j*p];
1307       }
1308     }
1309   }
1310 
1311   if (ncols)  *ncols  = nz;
1312   if (cols)   *cols   = idx;
1313   if (values) *values = v;
1314   PetscFunctionReturn(0);
1315 }
1316 
1317 PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1318 {
1319   PetscErrorCode ierr;
1320   PetscFunctionBegin;
1321   ierr = PetscFree2(*idx,*v);CHKERRQ(ierr);
1322   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
1323   PetscFunctionReturn(0);
1324 }
1325 
1326 PetscErrorCode  MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
1327 {
1328   PetscErrorCode ierr;
1329   Mat            A;
1330 
1331   PetscFunctionBegin;
1332   ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1333   ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr);
1334   ierr = MatDestroy(&A);CHKERRQ(ierr);
1335   PetscFunctionReturn(0);
1336 }
1337 
1338 /* ---------------------------------------------------------------------------------- */
1339 /*@C
1340   MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form:
1341 
1342     [I \otimes S + A \otimes T]
1343 
1344   where
1345     S is a dense (p \times q) matrix
1346     T is a dense (p \times q) matrix
1347     A is an AIJ  (n \times n) matrix
1348     I is the identity matrix
1349   The resulting matrix is (np \times nq)
1350 
1351   S and T are always stored independently on all processes as PetscScalar arrays in column-major format.
1352 
1353   Collective
1354 
1355   Input Parameters:
1356 + A - the AIJ matrix
1357 . p - number of rows in S and T
1358 . q - number of columns in S and T
1359 . S - the S matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major)
1360 - T - the T matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major)
1361 
1362   Output Parameter:
1363 . kaij - the new KAIJ matrix
1364 
1365   Notes:
1366   This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed.
1367   Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix.
1368 
1369   Developer Notes:
1370   In the MATMPIKAIJ case, the internal 'AIJ' and 'OAIJ' sequential KAIJ matrices are kept up to date by tracking the object state
1371   of the AIJ matrix 'A' that describes the blockwise action of the MATMPIKAIJ matrix and, if the object state has changed, lazily
1372   rebuilding 'AIJ' and 'OAIJ' just before executing operations with the MATMPIKAIJ matrix. If new types of operations are added,
1373   routines implementing those must also ensure these are rebuilt when needed (by calling the internal MatKAIJ_build_AIJ_OAIJ() routine).
1374 
1375   Level: advanced
1376 
1377 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MATKAIJ
1378 @*/
1379 PetscErrorCode  MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij)
1380 {
1381   PetscErrorCode ierr;
1382 
1383   PetscFunctionBegin;
1384   ierr = MatCreate(PetscObjectComm((PetscObject)A),kaij);CHKERRQ(ierr);
1385   ierr = MatSetType(*kaij,MATKAIJ);CHKERRQ(ierr);
1386   ierr = MatKAIJSetAIJ(*kaij,A);CHKERRQ(ierr);
1387   ierr = MatKAIJSetS(*kaij,p,q,S);CHKERRQ(ierr);
1388   ierr = MatKAIJSetT(*kaij,p,q,T);CHKERRQ(ierr);
1389   ierr = MatSetUp(*kaij);CHKERRQ(ierr);
1390   PetscFunctionReturn(0);
1391 }
1392 
1393 /*MC
1394   MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of form
1395     [I \otimes S + A \otimes T],
1396   where
1397     S is a dense (p \times q) matrix,
1398     T is a dense (p \times q) matrix,
1399     A is an AIJ  (n \times n) matrix,
1400     and I is the identity matrix.
1401   The resulting matrix is (np \times nq).
1402 
1403   S and T are always stored independently on all processes as PetscScalar arrays in column-major format.
1404 
1405   Notes:
1406   A linear system with multiple right-hand sides, AX = B, can be expressed in the KAIJ-friendly form of (A \otimes I) x = b,
1407   where x and b are column vectors containing the row-major representations of X and B.
1408 
1409   Level: advanced
1410 
1411 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ()
1412 M*/
1413 
1414 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A)
1415 {
1416   PetscErrorCode ierr;
1417   Mat_MPIKAIJ    *b;
1418   PetscMPIInt    size;
1419 
1420   PetscFunctionBegin;
1421   ierr     = PetscNewLog(A,&b);CHKERRQ(ierr);
1422   A->data  = (void*)b;
1423 
1424   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1425 
1426   b->w    = NULL;
1427   ierr    = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
1428   if (size == 1) {
1429     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ);CHKERRQ(ierr);
1430     A->ops->destroy             = MatDestroy_SeqKAIJ;
1431     A->ops->mult                = MatMult_SeqKAIJ;
1432     A->ops->multadd             = MatMultAdd_SeqKAIJ;
1433     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ;
1434     A->ops->getrow              = MatGetRow_SeqKAIJ;
1435     A->ops->restorerow          = MatRestoreRow_SeqKAIJ;
1436     A->ops->sor                 = MatSOR_SeqKAIJ;
1437   } else {
1438     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ);CHKERRQ(ierr);
1439     A->ops->destroy             = MatDestroy_MPIKAIJ;
1440     A->ops->mult                = MatMult_MPIKAIJ;
1441     A->ops->multadd             = MatMultAdd_MPIKAIJ;
1442     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ;
1443     A->ops->getrow              = MatGetRow_MPIKAIJ;
1444     A->ops->restorerow          = MatRestoreRow_MPIKAIJ;
1445     ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ);CHKERRQ(ierr);
1446   }
1447   A->ops->setup           = MatSetUp_KAIJ;
1448   A->ops->view            = MatView_KAIJ;
1449   A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ;
1450   PetscFunctionReturn(0);
1451 }
1452