xref: /petsc/src/mat/impls/kaij/kaij.c (revision eacc18261942eaffc89f187867aa512fa9f150c5)
1 
2 /*
3   Defines the basic matrix operations for the KAIJ  matrix storage format.
4   This format is used to evaluate matrices of the form:
5 
6     [I \otimes S + A \otimes T]
7 
8   where
9     S is a dense (p \times q) matrix
10     T is a dense (p \times q) matrix
11     A is an AIJ  (n \times n) matrix
12     I is the identity matrix
13 
14   The resulting matrix is (np \times nq)
15 
16   We provide:
17      MatMult()
18      MatMultAdd()
19      MatInvertBlockDiagonal()
20   and
21      MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*)
22 
23   This single directory handles both the sequential and parallel codes
24 */
25 
26 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/
27 #include <../src/mat/utils/freespace.h>
28 #include <petsc/private/vecimpl.h>
29 
30 /*@C
31    MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix
32 
33    Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel
34 
35    Input Parameter:
36 .  A - the KAIJ matrix
37 
38    Output Parameter:
39 .  B - the AIJ matrix
40 
41    Level: advanced
42 
43    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
44 
45 .seealso: MatCreateKAIJ()
46 @*/
47 PetscErrorCode  MatKAIJGetAIJ(Mat A,Mat *B)
48 {
49   PetscErrorCode ierr;
50   PetscBool      ismpikaij,isseqkaij;
51 
52   PetscFunctionBegin;
53   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr);
54   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij);CHKERRQ(ierr);
55   if (ismpikaij) {
56     Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
57 
58     *B = b->A;
59   } else if (isseqkaij) {
60     Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
61 
62     *B = b->AIJ;
63   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix passed in is not of type KAIJ");
64   PetscFunctionReturn(0);
65 }
66 
67 /*@C
68    MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix
69 
70    Not Collective; the entire S is stored and returned independently on all processes.
71 
72    Input Parameter:
73 .  A - the KAIJ matrix
74 
75    Output Parameters:
76 +  m - the number of rows in S
77 .  n - the number of columns in S
78 -  S - the S matrix, in form of a scalar array in column-major format
79 
80    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
81 
82    Level: advanced
83 
84 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
85 @*/
86 PetscErrorCode MatKAIJGetS(Mat A,PetscInt *m,PetscInt *n,PetscScalar **S)
87 {
88   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
89   PetscFunctionBegin;
90   if (m) *m = b->p;
91   if (n) *n = b->q;
92   if (S) *S = b->S;
93   PetscFunctionReturn(0);
94 }
95 
96 /*@C
97    MatKAIJGetSRead - Get a read-only pointer to the S matrix describing the shift action of the KAIJ matrix
98 
99    Not Collective; the entire S is stored and returned independently on all processes.
100 
101    Input Parameter:
102 .  A - the KAIJ matrix
103 
104    Output Parameters:
105 +  m - the number of rows in S
106 .  n - the number of columns in S
107 -  S - the S matrix, in form of a scalar array in column-major format
108 
109    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
110 
111    Level: advanced
112 
113 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
114 @*/
115 PetscErrorCode MatKAIJGetSRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **S)
116 {
117   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
118   PetscFunctionBegin;
119   if (m) *m = b->p;
120   if (n) *n = b->q;
121   if (S) *S = b->S;
122   PetscFunctionReturn(0);
123 }
124 
125 /*@C
126   MatKAIJRestoreS - Restore array obtained with MatKAIJGetS()
127 
128   Not collective
129 
130   Input Parameter:
131 . A - the KAIJ matrix
132 
133   Output Parameter:
134 . S - location of pointer to array obtained with MatKAIJGetS()
135 
136   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
137   If NULL is passed, it will not attempt to zero the array pointer.
138 
139   Level: advanced
140 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead()
141 @*/
142 PetscErrorCode MatKAIJRestoreS(Mat A,PetscScalar **S)
143 {
144   PetscErrorCode ierr;
145 
146   PetscFunctionBegin;
147   if (S) *S = NULL;
148   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
149   PetscFunctionReturn(0);
150 }
151 
152 /*@C
153   MatKAIJRestoreSRead - Restore array obtained with MatKAIJGetSRead()
154 
155   Not collective
156 
157   Input Parameter:
158 . A - the KAIJ matrix
159 
160   Output Parameter:
161 . S - location of pointer to array obtained with MatKAIJGetS()
162 
163   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
164   If NULL is passed, it will not attempt to zero the array pointer.
165 
166   Level: advanced
167 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead()
168 @*/
169 PetscErrorCode MatKAIJRestoreSRead(Mat A,const PetscScalar **S)
170 {
171   PetscFunctionBegin;
172   if (S) *S = NULL;
173   PetscFunctionReturn(0);
174 }
175 
176 /*@C
177    MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix
178 
179    Not Collective; the entire T is stored and returned independently on all processes
180 
181    Input Parameter:
182 .  A - the KAIJ matrix
183 
184    Output Parameters:
185 +  m - the number of rows in T
186 .  n - the number of columns in T
187 -  T - the T matrix, in form of a scalar array in column-major format
188 
189    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
190 
191    Level: advanced
192 
193 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
194 @*/
195 PetscErrorCode MatKAIJGetT(Mat A,PetscInt *m,PetscInt *n,PetscScalar **T)
196 {
197   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
198   PetscFunctionBegin;
199   if (m) *m = b->p;
200   if (n) *n = b->q;
201   if (T) *T = b->T;
202   PetscFunctionReturn(0);
203 }
204 
205 /*@C
206    MatKAIJGetTRead - Get a read-only pointer to the transformation matrix T associated with the KAIJ matrix
207 
208    Not Collective; the entire T is stored and returned independently on all processes
209 
210    Input Parameter:
211 .  A - the KAIJ matrix
212 
213    Output Parameters:
214 +  m - the number of rows in T
215 .  n - the number of columns in T
216 -  T - the T matrix, in form of a scalar array in column-major format
217 
218    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
219 
220    Level: advanced
221 
222 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
223 @*/
224 PetscErrorCode MatKAIJGetTRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **T)
225 {
226   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
227   PetscFunctionBegin;
228   if (m) *m = b->p;
229   if (n) *n = b->q;
230   if (T) *T = b->T;
231   PetscFunctionReturn(0);
232 }
233 
234 /*@C
235   MatKAIJRestoreT - Restore array obtained with MatKAIJGetT()
236 
237   Not collective
238 
239   Input Parameter:
240 . A - the KAIJ matrix
241 
242   Output Parameter:
243 . T - location of pointer to array obtained with MatKAIJGetS()
244 
245   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
246   If NULL is passed, it will not attempt to zero the array pointer.
247 
248   Level: advanced
249 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead()
250 @*/
251 PetscErrorCode MatKAIJRestoreT(Mat A,PetscScalar **T)
252 {
253   PetscErrorCode ierr;
254 
255   PetscFunctionBegin;
256   if (T) *T = NULL;
257   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
258   PetscFunctionReturn(0);
259 }
260 
261 /*@C
262   MatKAIJRestoreTRead - Restore array obtained with MatKAIJGetTRead()
263 
264   Not collective
265 
266   Input Parameter:
267 . A - the KAIJ matrix
268 
269   Output Parameter:
270 . T - location of pointer to array obtained with MatKAIJGetS()
271 
272   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
273   If NULL is passed, it will not attempt to zero the array pointer.
274 
275   Level: advanced
276 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead()
277 @*/
278 PetscErrorCode MatKAIJRestoreTRead(Mat A,const PetscScalar **T)
279 {
280   PetscFunctionBegin;
281   if (T) *T = NULL;
282   PetscFunctionReturn(0);
283 }
284 
285 /*@
286    MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix
287 
288    Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel
289 
290    Input Parameters:
291 +  A - the KAIJ matrix
292 -  B - the AIJ matrix
293 
294    Notes:
295    This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed.
296    Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix.
297 
298    Level: advanced
299 
300 .seealso: MatKAIJGetAIJ(), MatKAIJSetS(), MatKAIJSetT()
301 @*/
302 PetscErrorCode MatKAIJSetAIJ(Mat A,Mat B)
303 {
304   PetscErrorCode ierr;
305   PetscMPIInt    size;
306 
307   PetscFunctionBegin;
308   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
309   if (size == 1) {
310     Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
311     a->AIJ = B;
312   } else {
313     Mat_MPIKAIJ *a = (Mat_MPIKAIJ*)A->data;
314     a->A = B;
315   }
316   ierr = PetscObjectReference((PetscObject)B);CHKERRQ(ierr);
317   PetscFunctionReturn(0);
318 }
319 
320 /*@C
321    MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix
322 
323    Logically Collective; the entire S is stored independently on all processes.
324 
325    Input Parameters:
326 +  A - the KAIJ matrix
327 .  p - the number of rows in S
328 .  q - the number of columns in S
329 -  S - the S matrix, in form of a scalar array in column-major format
330 
331    Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix.
332    The S matrix is copied, so the user can destroy this array.
333 
334    Level: Advanced
335 
336 .seealso: MatKAIJGetS(), MatKAIJSetT(), MatKAIJSetAIJ()
337 @*/
338 PetscErrorCode MatKAIJSetS(Mat A,PetscInt p,PetscInt q,const PetscScalar S[])
339 {
340   PetscErrorCode ierr;
341   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
342 
343   PetscFunctionBegin;
344   ierr = PetscFree(a->S);CHKERRQ(ierr);
345   if (S) {
346     ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->S);CHKERRQ(ierr);
347     ierr = PetscMemcpy(a->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
348   } else  a->S = NULL;
349 
350   a->p = p;
351   a->q = q;
352   PetscFunctionReturn(0);
353 }
354 
355 /*@C
356    MatKAIJGetScaledIdentity - Check if both S and T are scaled identities.
357 
358    Logically Collective.
359 
360    Input Parameter:
361 .  A - the KAIJ matrix
362 
363   Output Parameter:
364 .  identity - the Boolean value
365 
366    Level: Advanced
367 
368 .seealso: MatKAIJGetS(), MatKAIJGetT()
369 @*/
370 PetscErrorCode MatKAIJGetScaledIdentity(Mat A,PetscBool* identity)
371 {
372   Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
373   PetscInt    i,j;
374 
375   PetscFunctionBegin;
376   if (a->p != a->q) {
377     *identity = PETSC_FALSE;
378     PetscFunctionReturn(0);
379   } else *identity = PETSC_TRUE;
380   if (!a->isTI || a->S) {
381     for (i=0; i<a->p && *identity; i++) {
382       for (j=0; j<a->p && *identity; j++) {
383         if (i != j) {
384           if (a->S && PetscAbsScalar(a->S[i+j*a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
385           if (a->T && PetscAbsScalar(a->T[i+j*a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
386         } else {
387           if (a->S && PetscAbsScalar(a->S[i*(a->p+1)]-a->S[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
388           if (a->T && PetscAbsScalar(a->T[i*(a->p+1)]-a->T[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
389         }
390       }
391     }
392   }
393   PetscFunctionReturn(0);
394 }
395 
396 /*@C
397    MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix
398 
399    Logically Collective; the entire T is stored independently on all processes.
400 
401    Input Parameters:
402 +  A - the KAIJ matrix
403 .  p - the number of rows in S
404 .  q - the number of columns in S
405 -  T - the T matrix, in form of a scalar array in column-major format
406 
407    Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix.
408    The T matrix is copied, so the user can destroy this array.
409 
410    Level: Advanced
411 
412 .seealso: MatKAIJGetT(), MatKAIJSetS(), MatKAIJSetAIJ()
413 @*/
414 PetscErrorCode MatKAIJSetT(Mat A,PetscInt p,PetscInt q,const PetscScalar T[])
415 {
416   PetscErrorCode ierr;
417   PetscInt       i,j;
418   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
419   PetscBool      isTI = PETSC_FALSE;
420 
421   PetscFunctionBegin;
422   /* check if T is an identity matrix */
423   if (T && (p == q)) {
424     isTI = PETSC_TRUE;
425     for (i=0; i<p; i++) {
426       for (j=0; j<q; j++) {
427         if (i == j) {
428           /* diagonal term must be 1 */
429           if (T[i+j*p] != 1.0) isTI = PETSC_FALSE;
430         } else {
431           /* off-diagonal term must be 0 */
432           if (T[i+j*p] != 0.0) isTI = PETSC_FALSE;
433         }
434       }
435     }
436   }
437   a->isTI = isTI;
438 
439   ierr = PetscFree(a->T);CHKERRQ(ierr);
440   if (T && (!isTI)) {
441     ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->T);CHKERRQ(ierr);
442     ierr = PetscMemcpy(a->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
443   } else a->T = NULL;
444 
445   a->p = p;
446   a->q = q;
447   PetscFunctionReturn(0);
448 }
449 
450 PetscErrorCode MatDestroy_SeqKAIJ(Mat A)
451 {
452   PetscErrorCode ierr;
453   Mat_SeqKAIJ    *b = (Mat_SeqKAIJ*)A->data;
454 
455   PetscFunctionBegin;
456   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
457   ierr = PetscFree(b->S);CHKERRQ(ierr);
458   ierr = PetscFree(b->T);CHKERRQ(ierr);
459   ierr = PetscFree(b->ibdiag);CHKERRQ(ierr);
460   ierr = PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr);CHKERRQ(ierr);
461   ierr = PetscFree(A->data);CHKERRQ(ierr);
462   PetscFunctionReturn(0);
463 }
464 
465 PETSC_INTERN PetscErrorCode MatKAIJ_build_AIJ_OAIJ(Mat A)
466 {
467   PetscErrorCode   ierr;
468   Mat_MPIKAIJ      *a;
469   Mat_MPIAIJ       *mpiaij;
470   PetscScalar      *T;
471   PetscInt         i,j;
472   PetscObjectState state;
473 
474   PetscFunctionBegin;
475   a = (Mat_MPIKAIJ*)A->data;
476   mpiaij = (Mat_MPIAIJ*)a->A->data;
477 
478   ierr = PetscObjectStateGet((PetscObject)a->A,&state);CHKERRQ(ierr);
479   if (state == a->state) {
480     /* The existing AIJ and KAIJ members are up-to-date, so simply exit. */
481     PetscFunctionReturn(0);
482   } else {
483     ierr = MatDestroy(&a->AIJ);CHKERRQ(ierr);
484     ierr = MatDestroy(&a->OAIJ);CHKERRQ(ierr);
485     if (a->isTI) {
486       /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL.
487        * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will
488        * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix
489        * to pass in. */
490       ierr = PetscMalloc1(a->p*a->q*sizeof(PetscScalar),&T);CHKERRQ(ierr);
491       for (i=0; i<a->p; i++) {
492         for (j=0; j<a->q; j++) {
493           if (i==j) T[i+j*a->p] = 1.0;
494           else      T[i+j*a->p] = 0.0;
495         }
496       }
497     } else T = a->T;
498     ierr = MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ);CHKERRQ(ierr);
499     ierr = MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ);CHKERRQ(ierr);
500     if (a->isTI) {
501       ierr = PetscFree(T);CHKERRQ(ierr);
502     }
503     a->state = state;
504   }
505 
506   PetscFunctionReturn(0);
507 }
508 
509 PetscErrorCode MatSetUp_KAIJ(Mat A)
510 {
511   PetscErrorCode ierr;
512   PetscInt       n;
513   PetscMPIInt    size;
514   Mat_SeqKAIJ    *seqkaij = (Mat_SeqKAIJ*)A->data;
515 
516   PetscFunctionBegin;
517   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
518   if (size == 1) {
519     ierr = MatSetSizes(A,seqkaij->p*seqkaij->AIJ->rmap->n,seqkaij->q*seqkaij->AIJ->cmap->n,seqkaij->p*seqkaij->AIJ->rmap->N,seqkaij->q*seqkaij->AIJ->cmap->N);CHKERRQ(ierr);
520     ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr);
521     ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr);
522     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
523     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
524   } else {
525     Mat_MPIKAIJ *a;
526     Mat_MPIAIJ  *mpiaij;
527     IS          from,to;
528     Vec         gvec;
529 
530     a = (Mat_MPIKAIJ*)A->data;
531     mpiaij = (Mat_MPIAIJ*)a->A->data;
532     ierr = MatSetSizes(A,a->p*a->A->rmap->n,a->q*a->A->cmap->n,a->p*a->A->rmap->N,a->q*a->A->cmap->N);CHKERRQ(ierr);
533     ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr);
534     ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr);
535     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
536     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
537 
538     ierr = MatKAIJ_build_AIJ_OAIJ(A);CHKERRQ(ierr);
539 
540     ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
541     ierr = VecCreate(PETSC_COMM_SELF,&a->w);CHKERRQ(ierr);
542     ierr = VecSetSizes(a->w,n*a->q,n*a->q);CHKERRQ(ierr);
543     ierr = VecSetBlockSize(a->w,a->q);CHKERRQ(ierr);
544     ierr = VecSetType(a->w,VECSEQ);CHKERRQ(ierr);
545 
546     /* create two temporary Index sets for build scatter gather */
547     ierr = ISCreateBlock(PetscObjectComm((PetscObject)a->A),a->q,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
548     ierr = ISCreateStride(PETSC_COMM_SELF,n*a->q,0,1,&to);CHKERRQ(ierr);
549 
550     /* create temporary global vector to generate scatter context */
551     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A),a->q,a->q*a->A->cmap->n,a->q*a->A->cmap->N,NULL,&gvec);CHKERRQ(ierr);
552 
553     /* generate the scatter context */
554     ierr = VecScatterCreate(gvec,from,a->w,to,&a->ctx);CHKERRQ(ierr);
555 
556     ierr = ISDestroy(&from);CHKERRQ(ierr);
557     ierr = ISDestroy(&to);CHKERRQ(ierr);
558     ierr = VecDestroy(&gvec);CHKERRQ(ierr);
559   }
560 
561   A->assembled = PETSC_TRUE;
562   PetscFunctionReturn(0);
563 }
564 
565 PetscErrorCode MatView_KAIJ(Mat A,PetscViewer viewer)
566 {
567   PetscViewerFormat format;
568   Mat_SeqKAIJ       *a = (Mat_SeqKAIJ*)A->data;
569   Mat               B;
570   PetscInt          i;
571   PetscErrorCode    ierr;
572   PetscBool         ismpikaij;
573 
574   PetscFunctionBegin;
575   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr);
576   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
577   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) {
578     ierr = PetscViewerASCIIPrintf(viewer,"S and T have %D rows and %D columns\n",a->p,a->q);CHKERRQ(ierr);
579 
580     /* Print appropriate details for S. */
581     if (!a->S) {
582       ierr = PetscViewerASCIIPrintf(viewer,"S is NULL\n");CHKERRQ(ierr);
583     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
584       ierr = PetscViewerASCIIPrintf(viewer,"Entries of S are ");CHKERRQ(ierr);
585       for (i=0; i<(a->p * a->q); i++) {
586 #if defined(PETSC_USE_COMPLEX)
587         ierr = PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->S[i]),(double)PetscImaginaryPart(a->S[i]));CHKERRQ(ierr);
588 #else
589         ierr = PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->S[i]));CHKERRQ(ierr);
590 #endif
591       }
592       ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
593     }
594 
595     /* Print appropriate details for T. */
596     if (a->isTI) {
597       ierr = PetscViewerASCIIPrintf(viewer,"T is the identity matrix\n");CHKERRQ(ierr);
598     } else if (!a->T) {
599       ierr = PetscViewerASCIIPrintf(viewer,"T is NULL\n");CHKERRQ(ierr);
600     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
601       ierr = PetscViewerASCIIPrintf(viewer,"Entries of T are ");CHKERRQ(ierr);
602       for (i=0; i<(a->p * a->q); i++) {
603 #if defined(PETSC_USE_COMPLEX)
604         ierr = PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->T[i]),(double)PetscImaginaryPart(a->T[i]));CHKERRQ(ierr);
605 #else
606         ierr = PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->T[i]));CHKERRQ(ierr);
607 #endif
608       }
609       ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
610     }
611 
612     /* Now print details for the AIJ matrix, using the AIJ viewer. */
613     ierr = PetscViewerASCIIPrintf(viewer,"Now viewing the associated AIJ matrix:\n");CHKERRQ(ierr);
614     if (ismpikaij) {
615       Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
616       ierr = MatView(b->A,viewer);CHKERRQ(ierr);
617     } else {
618       ierr = MatView(a->AIJ,viewer);CHKERRQ(ierr);
619     }
620 
621   } else {
622     /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */
623     if (ismpikaij) {
624       ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
625     } else {
626       ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
627     }
628     ierr = MatView(B,viewer);CHKERRQ(ierr);
629     ierr = MatDestroy(&B);CHKERRQ(ierr);
630   }
631   PetscFunctionReturn(0);
632 }
633 
634 PetscErrorCode MatDestroy_MPIKAIJ(Mat A)
635 {
636   PetscErrorCode ierr;
637   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
638 
639   PetscFunctionBegin;
640   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
641   ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr);
642   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
643   ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr);
644   ierr = VecDestroy(&b->w);CHKERRQ(ierr);
645   ierr = PetscFree(b->S);CHKERRQ(ierr);
646   ierr = PetscFree(b->T);CHKERRQ(ierr);
647   ierr = PetscFree(b->ibdiag);CHKERRQ(ierr);
648   ierr = PetscFree(A->data);CHKERRQ(ierr);
649   PetscFunctionReturn(0);
650 }
651 
652 /* --------------------------------------------------------------------------------------*/
653 
654 /* zz = yy + Axx */
655 PetscErrorCode MatMultAdd_SeqKAIJ(Mat A,Vec xx,Vec yy,Vec zz)
656 {
657   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ*)A->data;
658   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
659   const PetscScalar *s = b->S, *t = b->T;
660   const PetscScalar *x,*v,*bx;
661   PetscScalar       *y,*sums;
662   PetscErrorCode    ierr;
663   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
664   PetscInt          n,i,jrow,j,l,p=b->p,q=b->q,k;
665 
666   PetscFunctionBegin;
667   if (!yy) {
668     ierr = VecSet(zz,0.0);CHKERRQ(ierr);
669   } else {
670     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
671   }
672   if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0);
673 
674   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
675   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
676   idx  = a->j;
677   v    = a->a;
678   ii   = a->i;
679 
680   if (b->isTI) {
681     for (i=0; i<m; i++) {
682       jrow = ii[i];
683       n    = ii[i+1] - jrow;
684       sums = y + p*i;
685       for (j=0; j<n; j++) {
686         for (k=0; k<p; k++) {
687           sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k];
688         }
689       }
690     }
691     ierr = PetscLogFlops(3.0*(a->nz)*p);CHKERRQ(ierr);
692   } else if (t) {
693     for (i=0; i<m; i++) {
694       jrow = ii[i];
695       n    = ii[i+1] - jrow;
696       sums = y + p*i;
697       for (j=0; j<n; j++) {
698         for (k=0; k<p; k++) {
699           for (l=0; l<q; l++) {
700             sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l];
701           }
702         }
703       }
704     }
705     /* The flop count below assumes that v[jrow+j] is hoisted out (which an optimizing compiler is likely to do),
706      * and also that T part is hoisted outside this loop (in exchange for temporary storage) as (A \otimes I) (I \otimes T),
707      * so that this multiply doesn't have to be redone for each matrix entry, but just once per column. The latter
708      * transformation is much less likely to be applied, but we nonetheless count the minimum flops required. */
709     ierr = PetscLogFlops((2.0*p*q-p)*m+2.0*p*a->nz);CHKERRQ(ierr);
710   }
711   if (s) {
712     for (i=0; i<m; i++) {
713       sums = y + p*i;
714       bx   = x + q*i;
715       if (i < b->AIJ->cmap->n) {
716         for (j=0; j<q; j++) {
717           for (k=0; k<p; k++) {
718             sums[k] += s[k+j*p]*bx[j];
719           }
720         }
721       }
722     }
723     ierr = PetscLogFlops(2.0*m*p*q);CHKERRQ(ierr);
724   }
725 
726   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
727   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
728   PetscFunctionReturn(0);
729 }
730 
731 PetscErrorCode MatMult_SeqKAIJ(Mat A,Vec xx,Vec yy)
732 {
733   PetscErrorCode ierr;
734   PetscFunctionBegin;
735   ierr = MatMultAdd_SeqKAIJ(A,xx,PETSC_NULL,yy);CHKERRQ(ierr);
736   PetscFunctionReturn(0);
737 }
738 
739 #include <petsc/private/kernels/blockinvert.h>
740 
741 PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A,const PetscScalar **values)
742 {
743   Mat_SeqKAIJ       *b  = (Mat_SeqKAIJ*)A->data;
744   Mat_SeqAIJ        *a  = (Mat_SeqAIJ*)b->AIJ->data;
745   const PetscScalar *S  = b->S;
746   const PetscScalar *T  = b->T;
747   const PetscScalar *v  = a->a;
748   const PetscInt     p  = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i;
749   PetscErrorCode    ierr;
750   PetscInt          i,j,*v_pivots,dof,dof2;
751   PetscScalar       *diag,aval,*v_work;
752 
753   PetscFunctionBegin;
754   if (p != q) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse.");
755   if ((!S) && (!T) && (!b->isTI)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix.");
756 
757   dof  = p;
758   dof2 = dof*dof;
759 
760   if (b->ibdiagvalid) {
761     if (values) *values = b->ibdiag;
762     PetscFunctionReturn(0);
763   }
764   if (!b->ibdiag) {
765     ierr = PetscMalloc1(dof2*m*sizeof(PetscScalar),&b->ibdiag);CHKERRQ(ierr);
766     ierr = PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar));CHKERRQ(ierr);
767   }
768   if (values) *values = b->ibdiag;
769   diag = b->ibdiag;
770 
771   ierr = PetscMalloc2(dof,&v_work,dof,&v_pivots);CHKERRQ(ierr);
772   for (i=0; i<m; i++) {
773     if (S) {
774       ierr = PetscMemcpy(diag,S,dof2*sizeof(PetscScalar));CHKERRQ(ierr);
775     } else {
776       ierr = PetscMemzero(diag,dof2*sizeof(PetscScalar));CHKERRQ(ierr);
777     }
778     if (b->isTI) {
779       aval = 0;
780       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
781       for (j=0; j<dof; j++) diag[j+dof*j] += aval;
782     } else if (T) {
783       aval = 0;
784       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
785       for (j=0; j<dof2; j++) diag[j] += aval*T[j];
786     }
787     ierr = PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL);CHKERRQ(ierr);
788     diag += dof2;
789   }
790   ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
791 
792   b->ibdiagvalid = PETSC_TRUE;
793   PetscFunctionReturn(0);
794 }
795 
796 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B)
797 {
798   Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data;
799 
800   PetscFunctionBegin;
801   *B = kaij->AIJ;
802   PetscFunctionReturn(0);
803 }
804 
805 PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
806 {
807   PetscErrorCode    ierr;
808   Mat_SeqKAIJ       *kaij = (Mat_SeqKAIJ*) A->data;
809   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)kaij->AIJ->data;
810   const PetscScalar *aa = a->a, *T = kaij->T, *v;
811   const PetscInt    m  = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi;
812   const PetscScalar *b, *xb, *idiag;
813   PetscScalar       *x, *work, *workt, *w, *y, *arr, *t, *arrt;
814   PetscInt          i, j, k, i2, bs, bs2, nz;
815 
816   PetscFunctionBegin;
817   its = its*lits;
818   if (flag & SOR_EISENSTAT) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
819   if (its <= 0)             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
820   if (fshift)               SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift");
821   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts");
822   if (p != q) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: No support for non-square dense blocks");
823   else        {bs = p; bs2 = bs*bs; }
824 
825   if (!m) PetscFunctionReturn(0);
826 
827   if (!kaij->ibdiagvalid) { ierr = MatInvertBlockDiagonal_SeqKAIJ(A,NULL);CHKERRQ(ierr); }
828   idiag = kaij->ibdiag;
829   diag  = a->diag;
830 
831   if (!kaij->sor.setup) {
832     ierr = PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr);CHKERRQ(ierr);
833     kaij->sor.setup = PETSC_TRUE;
834   }
835   y     = kaij->sor.y;
836   w     = kaij->sor.w;
837   work  = kaij->sor.work;
838   t     = kaij->sor.t;
839   arr   = kaij->sor.arr;
840 
841   ierr = VecGetArray(xx,&x);    CHKERRQ(ierr);
842   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
843 
844   if (flag & SOR_ZERO_INITIAL_GUESS) {
845     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
846       PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);                            /* x[0:bs] <- D^{-1} b[0:bs] */
847       ierr   =  PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
848       i2     =  bs;
849       idiag  += bs2;
850       for (i=1; i<m; i++) {
851         v  = aa + ai[i];
852         vi = aj + ai[i];
853         nz = diag[i] - ai[i];
854 
855         if (T) {                /* b - T (Arow * x) */
856           ierr = PetscMemzero(w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
857           for (j=0; j<nz; j++) {
858             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
859           }
860           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]);
861           for (k=0; k<bs; k++) t[i2+k] += b[i2+k];
862         } else if (kaij->isTI) {
863           ierr = PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
864           for (j=0; j<nz; j++) {
865             for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k];
866           }
867         } else {
868           ierr = PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
869         }
870 
871         PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y);
872         for (j=0; j<bs; j++) x[i2+j] = omega * y[j];
873 
874         idiag += bs2;
875         i2    += bs;
876       }
877       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
878       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
879       xb = t;
880     } else xb = b;
881     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
882       idiag = kaij->ibdiag+bs2*(m-1);
883       i2    = bs * (m-1);
884       ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
885       PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
886       i2    -= bs;
887       idiag -= bs2;
888       for (i=m-2; i>=0; i--) {
889         v  = aa + diag[i] + 1 ;
890         vi = aj + diag[i] + 1;
891         nz = ai[i+1] - diag[i] - 1;
892 
893         if (T) {                /* FIXME: This branch untested */
894           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
895           /* copy all rows of x that are needed into contiguous space */
896           workt = work;
897           for (j=0; j<nz; j++) {
898             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
899             workt += bs;
900           }
901           arrt = arr;
902           for (j=0; j<nz; j++) {
903             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
904             for (k=0; k<bs2; k++) arrt[k] *= v[j];
905             arrt += bs2;
906           }
907           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
908         } else if (kaij->isTI) {
909           ierr = PetscMemcpy(w,t+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
910           for (j=0; j<nz; j++) {
911             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
912           }
913         }
914 
915         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */
916         for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j];
917 
918         idiag -= bs2;
919         i2    -= bs;
920       }
921       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
922     }
923     its--;
924   }
925   while (its--) {               /* FIXME: This branch not updated */
926     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
927       i2     =  0;
928       idiag  = kaij->ibdiag;
929       for (i=0; i<m; i++) {
930         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
931 
932         v  = aa + ai[i];
933         vi = aj + ai[i];
934         nz = diag[i] - ai[i];
935         workt = work;
936         for (j=0; j<nz; j++) {
937           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
938           workt += bs;
939         }
940         arrt = arr;
941         if (T) {
942           for (j=0; j<nz; j++) {
943             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
944             for (k=0; k<bs2; k++) arrt[k] *= v[j];
945             arrt += bs2;
946           }
947           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
948         } else if (kaij->isTI) {
949           for (j=0; j<nz; j++) {
950             ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
951             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
952             arrt += bs2;
953           }
954           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
955         }
956         ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
957 
958         v  = aa + diag[i] + 1;
959         vi = aj + diag[i] + 1;
960         nz = ai[i+1] - diag[i] - 1;
961         workt = work;
962         for (j=0; j<nz; j++) {
963           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
964           workt += bs;
965         }
966         arrt = arr;
967         if (T) {
968           for (j=0; j<nz; j++) {
969             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
970             for (k=0; k<bs2; k++) arrt[k] *= v[j];
971             arrt += bs2;
972           }
973           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
974         } else if (kaij->isTI) {
975           for (j=0; j<nz; j++) {
976             ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
977             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
978             arrt += bs2;
979           }
980           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
981         }
982 
983         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
984         for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
985 
986         idiag += bs2;
987         i2    += bs;
988       }
989       xb = t;
990     }
991     else xb = b;
992     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
993       idiag = kaij->ibdiag+bs2*(m-1);
994       i2    = bs * (m-1);
995       if (xb == b) {
996         for (i=m-1; i>=0; i--) {
997           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
998 
999           v  = aa + ai[i];
1000           vi = aj + ai[i];
1001           nz = diag[i] - ai[i];
1002           workt = work;
1003           for (j=0; j<nz; j++) {
1004             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1005             workt += bs;
1006           }
1007           arrt = arr;
1008           if (T) {
1009             for (j=0; j<nz; j++) {
1010               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
1011               for (k=0; k<bs2; k++) arrt[k] *= v[j];
1012               arrt += bs2;
1013             }
1014             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1015           } else if (kaij->isTI) {
1016             for (j=0; j<nz; j++) {
1017               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
1018               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
1019               arrt += bs2;
1020             }
1021             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1022           }
1023 
1024           v  = aa + diag[i] + 1;
1025           vi = aj + diag[i] + 1;
1026           nz = ai[i+1] - diag[i] - 1;
1027           workt = work;
1028           for (j=0; j<nz; j++) {
1029             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1030             workt += bs;
1031           }
1032           arrt = arr;
1033           if (T) {
1034             for (j=0; j<nz; j++) {
1035               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
1036               for (k=0; k<bs2; k++) arrt[k] *= v[j];
1037               arrt += bs2;
1038             }
1039             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1040           } else if (kaij->isTI) {
1041             for (j=0; j<nz; j++) {
1042               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
1043               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
1044               arrt += bs2;
1045             }
1046             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1047           }
1048 
1049           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
1050           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
1051         }
1052       } else {
1053         for (i=m-1; i>=0; i--) {
1054           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1055           v  = aa + diag[i] + 1;
1056           vi = aj + diag[i] + 1;
1057           nz = ai[i+1] - diag[i] - 1;
1058           workt = work;
1059           for (j=0; j<nz; j++) {
1060             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1061             workt += bs;
1062           }
1063           arrt = arr;
1064           if (T) {
1065             for (j=0; j<nz; j++) {
1066               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
1067               for (k=0; k<bs2; k++) arrt[k] *= v[j];
1068               arrt += bs2;
1069             }
1070             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1071           } else if (kaij->isTI) {
1072             for (j=0; j<nz; j++) {
1073               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
1074               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
1075               arrt += bs2;
1076             }
1077             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1078           }
1079           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
1080           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
1081         }
1082       }
1083       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1084     }
1085   }
1086 
1087   ierr = VecRestoreArray(xx,&x);    CHKERRQ(ierr);
1088   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1089   PetscFunctionReturn(0);
1090 }
1091 
1092 /*===================================================================================*/
1093 
1094 PetscErrorCode MatMultAdd_MPIKAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1095 {
1096   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
1097   PetscErrorCode ierr;
1098 
1099   PetscFunctionBegin;
1100   if (!yy) {
1101     ierr = VecSet(zz,0.0);CHKERRQ(ierr);
1102   } else {
1103     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
1104   }
1105   /* start the scatter */
1106   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1107   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz);CHKERRQ(ierr);
1108   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1109   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
1110   PetscFunctionReturn(0);
1111 }
1112 
1113 PetscErrorCode MatMult_MPIKAIJ(Mat A,Vec xx,Vec yy)
1114 {
1115   PetscErrorCode ierr;
1116   PetscFunctionBegin;
1117   ierr = MatMultAdd_MPIKAIJ(A,xx,PETSC_NULL,yy);CHKERRQ(ierr);
1118   PetscFunctionReturn(0);
1119 }
1120 
1121 PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A,const PetscScalar **values)
1122 {
1123   Mat_MPIKAIJ     *b = (Mat_MPIKAIJ*)A->data;
1124   PetscErrorCode  ierr;
1125 
1126   PetscFunctionBegin;
1127   ierr = (*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values);CHKERRQ(ierr);
1128   PetscFunctionReturn(0);
1129 }
1130 
1131 /* ----------------------------------------------------------------*/
1132 
1133 PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
1134 {
1135   Mat_SeqKAIJ     *b   = (Mat_SeqKAIJ*) A->data;
1136   PetscErrorCode  diag = PETSC_FALSE;
1137   PetscErrorCode  ierr;
1138   PetscInt        nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c;
1139   PetscScalar     *vaij,*v,*S=b->S,*T=b->T;
1140 
1141   PetscFunctionBegin;
1142   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1143   b->getrowactive = PETSC_TRUE;
1144   if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row);
1145 
1146   if ((!S) && (!T) && (!b->isTI)) {
1147     if (ncols)    *ncols  = 0;
1148     if (cols)     *cols   = NULL;
1149     if (values)   *values = NULL;
1150     PetscFunctionReturn(0);
1151   }
1152 
1153   if (T || b->isTI) {
1154     ierr  = MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij);CHKERRQ(ierr);
1155     c     = nzaij;
1156     for (i=0; i<nzaij; i++) {
1157       /* check if this row contains a diagonal entry */
1158       if (colsaij[i] == r) {
1159         diag = PETSC_TRUE;
1160         c = i;
1161       }
1162     }
1163   } else nzaij = c = 0;
1164 
1165   /* calculate size of row */
1166   nz = 0;
1167   if (S)            nz += q;
1168   if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q);
1169 
1170   if (cols || values) {
1171     ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr);
1172     for (i=0; i<q; i++) {
1173       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1174       v[i] = 0.0;
1175     }
1176     if (b->isTI) {
1177       for (i=0; i<nzaij; i++) {
1178         for (j=0; j<q; j++) {
1179           idx[i*q+j] = colsaij[i]*q+j;
1180           v[i*q+j]   = (j==s ? vaij[i] : 0);
1181         }
1182       }
1183     } else if (T) {
1184       for (i=0; i<nzaij; i++) {
1185         for (j=0; j<q; j++) {
1186           idx[i*q+j] = colsaij[i]*q+j;
1187           v[i*q+j]   = vaij[i]*T[s+j*p];
1188         }
1189       }
1190     }
1191     if (S) {
1192       for (j=0; j<q; j++) {
1193         idx[c*q+j] = r*q+j;
1194         v[c*q+j]  += S[s+j*p];
1195       }
1196     }
1197   }
1198 
1199   if (ncols)    *ncols  = nz;
1200   if (cols)     *cols   = idx;
1201   if (values)   *values = v;
1202   PetscFunctionReturn(0);
1203 }
1204 
1205 PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1206 {
1207   PetscErrorCode ierr;
1208 
1209   PetscFunctionBegin;
1210   if (nz) *nz = 0;
1211   ierr = PetscFree2(*idx,*v);CHKERRQ(ierr);
1212   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
1213   PetscFunctionReturn(0);
1214 }
1215 
1216 PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
1217 {
1218   Mat_MPIKAIJ     *b      = (Mat_MPIKAIJ*) A->data;
1219   Mat             MatAIJ  = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ;
1220   Mat             MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ;
1221   Mat             AIJ     = b->A;
1222   PetscBool       diag    = PETSC_FALSE;
1223   PetscErrorCode  ierr;
1224   const PetscInt  rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray;
1225   PetscInt        nz,*idx,ncolsaij = 0,ncolsoaij = 0,*colsaij,*colsoaij,r,s,c,i,j,lrow;
1226   PetscScalar     *v,*vals,*ovals,*S=b->S,*T=b->T;
1227 
1228   PetscFunctionBegin;
1229   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1230   b->getrowactive = PETSC_TRUE;
1231   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1232   lrow = row - rstart;
1233 
1234   if ((!S) && (!T) && (!b->isTI)) {
1235     if (ncols)    *ncols  = 0;
1236     if (cols)     *cols   = NULL;
1237     if (values)   *values = NULL;
1238     PetscFunctionReturn(0);
1239   }
1240 
1241   r = lrow/p;
1242   s = lrow%p;
1243 
1244   if (T || b->isTI) {
1245     ierr = MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray);CHKERRQ(ierr);
1246     ierr = MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals);CHKERRQ(ierr);
1247     ierr = MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals);CHKERRQ(ierr);
1248 
1249     c     = ncolsaij + ncolsoaij;
1250     for (i=0; i<ncolsaij; i++) {
1251       /* check if this row contains a diagonal entry */
1252       if (colsaij[i] == r) {
1253         diag = PETSC_TRUE;
1254         c = i;
1255       }
1256     }
1257   } else c = 0;
1258 
1259   /* calculate size of row */
1260   nz = 0;
1261   if (S)            nz += q;
1262   if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q);
1263 
1264   if (cols || values) {
1265     ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr);
1266     for (i=0; i<q; i++) {
1267       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1268       v[i] = 0.0;
1269     }
1270     if (b->isTI) {
1271       for (i=0; i<ncolsaij; i++) {
1272         for (j=0; j<q; j++) {
1273           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1274           v[i*q+j]   = (j==s ? vals[i] : 0.0);
1275         }
1276       }
1277       for (i=0; i<ncolsoaij; i++) {
1278         for (j=0; j<q; j++) {
1279           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1280           v[(i+ncolsaij)*q+j]   = (j==s ? ovals[i]: 0.0);
1281         }
1282       }
1283     } else if (T) {
1284       for (i=0; i<ncolsaij; i++) {
1285         for (j=0; j<q; j++) {
1286           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1287           v[i*q+j]   = vals[i]*T[s+j*p];
1288         }
1289       }
1290       for (i=0; i<ncolsoaij; i++) {
1291         for (j=0; j<q; j++) {
1292           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1293           v[(i+ncolsaij)*q+j]   = ovals[i]*T[s+j*p];
1294         }
1295       }
1296     }
1297     if (S) {
1298       for (j=0; j<q; j++) {
1299         idx[c*q+j] = (r+rstart/p)*q+j;
1300         v[c*q+j]  += S[s+j*p];
1301       }
1302     }
1303   }
1304 
1305   if (ncols)  *ncols  = nz;
1306   if (cols)   *cols   = idx;
1307   if (values) *values = v;
1308   PetscFunctionReturn(0);
1309 }
1310 
1311 PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1312 {
1313   PetscErrorCode ierr;
1314   PetscFunctionBegin;
1315   ierr = PetscFree2(*idx,*v);CHKERRQ(ierr);
1316   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
1317   PetscFunctionReturn(0);
1318 }
1319 
1320 PetscErrorCode  MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
1321 {
1322   PetscErrorCode ierr;
1323   Mat            A;
1324 
1325   PetscFunctionBegin;
1326   ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1327   ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr);
1328   ierr = MatDestroy(&A);CHKERRQ(ierr);
1329   PetscFunctionReturn(0);
1330 }
1331 
1332 /* ---------------------------------------------------------------------------------- */
1333 /*@C
1334   MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form:
1335 
1336     [I \otimes S + A \otimes T]
1337 
1338   where
1339     S is a dense (p \times q) matrix
1340     T is a dense (p \times q) matrix
1341     A is an AIJ  (n \times n) matrix
1342     I is the identity matrix
1343   The resulting matrix is (np \times nq)
1344 
1345   S and T are always stored independently on all processes as PetscScalar arrays in column-major format.
1346 
1347   Collective
1348 
1349   Input Parameters:
1350 + A - the AIJ matrix
1351 . p - number of rows in S and T
1352 . q - number of columns in S and T
1353 . S - the S matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major)
1354 - T - the T matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major)
1355 
1356   Output Parameter:
1357 . kaij - the new KAIJ matrix
1358 
1359   Notes:
1360   This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed.
1361   Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix.
1362 
1363   Level: advanced
1364 
1365 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MATKAIJ
1366 @*/
1367 PetscErrorCode  MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij)
1368 {
1369   PetscErrorCode ierr;
1370   PetscMPIInt    size;
1371 
1372   PetscFunctionBegin;
1373   ierr = MatCreate(PetscObjectComm((PetscObject)A),kaij);CHKERRQ(ierr);
1374   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
1375   if (size == 1) {
1376     ierr = MatSetType(*kaij,MATSEQKAIJ);CHKERRQ(ierr);
1377   } else {
1378     ierr = MatSetType(*kaij,MATMPIKAIJ);CHKERRQ(ierr);
1379   }
1380   ierr = MatKAIJSetAIJ(*kaij,A);CHKERRQ(ierr);
1381   ierr = MatKAIJSetS(*kaij,p,q,S);CHKERRQ(ierr);
1382   ierr = MatKAIJSetT(*kaij,p,q,T);CHKERRQ(ierr);
1383   ierr = MatSetUp(*kaij);CHKERRQ(ierr);
1384   PetscFunctionReturn(0);
1385 }
1386 
1387 /*MC
1388   MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of form
1389     [I \otimes S + A \otimes T],
1390   where
1391     S is a dense (p \times q) matrix,
1392     T is a dense (p \times q) matrix,
1393     A is an AIJ  (n \times n) matrix,
1394     and I is the identity matrix.
1395   The resulting matrix is (np \times nq).
1396 
1397   S and T are always stored independently on all processes as PetscScalar arrays in column-major format.
1398 
1399   Notes:
1400   A linear system with multiple right-hand sides, AX = B, can be expressed in the KAIJ-friendly form of (A \otimes I) x = b,
1401   where x and b are column vectors containing the row-major representations of X and B.
1402 
1403   Level: advanced
1404 
1405 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ()
1406 M*/
1407 
1408 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A)
1409 {
1410   PetscErrorCode ierr;
1411   Mat_MPIKAIJ    *b;
1412   PetscMPIInt    size;
1413 
1414   PetscFunctionBegin;
1415   ierr     = PetscNewLog(A,&b);CHKERRQ(ierr);
1416   A->data  = (void*)b;
1417 
1418   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1419 
1420   A->ops->setup = MatSetUp_KAIJ;
1421 
1422   b->w    = NULL;
1423   ierr    = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
1424   if (size == 1) {
1425     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ);CHKERRQ(ierr);
1426     A->ops->setup               = MatSetUp_KAIJ;
1427     A->ops->destroy             = MatDestroy_SeqKAIJ;
1428     A->ops->view                = MatView_KAIJ;
1429     A->ops->mult                = MatMult_SeqKAIJ;
1430     A->ops->multadd             = MatMultAdd_SeqKAIJ;
1431     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ;
1432     A->ops->getrow              = MatGetRow_SeqKAIJ;
1433     A->ops->restorerow          = MatRestoreRow_SeqKAIJ;
1434     A->ops->sor                 = MatSOR_SeqKAIJ;
1435   } else {
1436     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ);CHKERRQ(ierr);
1437     A->ops->setup               = MatSetUp_KAIJ;
1438     A->ops->destroy             = MatDestroy_MPIKAIJ;
1439     A->ops->view                = MatView_KAIJ;
1440     A->ops->mult                = MatMult_MPIKAIJ;
1441     A->ops->multadd             = MatMultAdd_MPIKAIJ;
1442     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ;
1443     A->ops->getrow              = MatGetRow_MPIKAIJ;
1444     A->ops->restorerow          = MatRestoreRow_MPIKAIJ;
1445     ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ);CHKERRQ(ierr);
1446   }
1447   A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ;
1448   PetscFunctionReturn(0);
1449 }
1450