xref: /petsc/src/mat/impls/kaij/kaij.c (revision 28b400f66ebc7ae0049166a2294dfcd3df27e64b)
1 
2 /*
3   Defines the basic matrix operations for the KAIJ  matrix storage format.
4   This format is used to evaluate matrices of the form:
5 
6     [I \otimes S + A \otimes T]
7 
8   where
9     S is a dense (p \times q) matrix
10     T is a dense (p \times q) matrix
11     A is an AIJ  (n \times n) matrix
12     I is the identity matrix
13 
14   The resulting matrix is (np \times nq)
15 
16   We provide:
17      MatMult()
18      MatMultAdd()
19      MatInvertBlockDiagonal()
20   and
21      MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*)
22 
23   This single directory handles both the sequential and parallel codes
24 */
25 
26 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/
27 #include <../src/mat/utils/freespace.h>
28 #include <petsc/private/vecimpl.h>
29 
30 /*@C
31    MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix
32 
33    Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel
34 
35    Input Parameter:
36 .  A - the KAIJ matrix
37 
38    Output Parameter:
39 .  B - the AIJ matrix
40 
41    Level: advanced
42 
43    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
44 
45 .seealso: MatCreateKAIJ()
46 @*/
47 PetscErrorCode  MatKAIJGetAIJ(Mat A,Mat *B)
48 {
49   PetscBool      ismpikaij,isseqkaij;
50 
51   PetscFunctionBegin;
52   CHKERRQ(PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij));
53   CHKERRQ(PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij));
54   if (ismpikaij) {
55     Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
56 
57     *B = b->A;
58   } else if (isseqkaij) {
59     Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
60 
61     *B = b->AIJ;
62   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix passed in is not of type KAIJ");
63   PetscFunctionReturn(0);
64 }
65 
66 /*@C
67    MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix
68 
69    Not Collective; the entire S is stored and returned independently on all processes.
70 
71    Input Parameter:
72 .  A - the KAIJ matrix
73 
74    Output Parameters:
75 +  m - the number of rows in S
76 .  n - the number of columns in S
77 -  S - the S matrix, in form of a scalar array in column-major format
78 
79    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
80 
81    Level: advanced
82 
83 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
84 @*/
85 PetscErrorCode MatKAIJGetS(Mat A,PetscInt *m,PetscInt *n,PetscScalar **S)
86 {
87   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
88   PetscFunctionBegin;
89   if (m) *m = b->p;
90   if (n) *n = b->q;
91   if (S) *S = b->S;
92   PetscFunctionReturn(0);
93 }
94 
95 /*@C
96    MatKAIJGetSRead - Get a read-only pointer to the S matrix describing the shift action of the KAIJ matrix
97 
98    Not Collective; the entire S is stored and returned independently on all processes.
99 
100    Input Parameter:
101 .  A - the KAIJ matrix
102 
103    Output Parameters:
104 +  m - the number of rows in S
105 .  n - the number of columns in S
106 -  S - the S matrix, in form of a scalar array in column-major format
107 
108    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
109 
110    Level: advanced
111 
112 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
113 @*/
114 PetscErrorCode MatKAIJGetSRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **S)
115 {
116   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
117   PetscFunctionBegin;
118   if (m) *m = b->p;
119   if (n) *n = b->q;
120   if (S) *S = b->S;
121   PetscFunctionReturn(0);
122 }
123 
124 /*@C
125   MatKAIJRestoreS - Restore array obtained with MatKAIJGetS()
126 
127   Not collective
128 
129   Input Parameter:
130 . A - the KAIJ matrix
131 
132   Output Parameter:
133 . S - location of pointer to array obtained with MatKAIJGetS()
134 
135   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
136   If NULL is passed, it will not attempt to zero the array pointer.
137 
138   Level: advanced
139 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead()
140 @*/
141 PetscErrorCode MatKAIJRestoreS(Mat A,PetscScalar **S)
142 {
143   PetscFunctionBegin;
144   if (S) *S = NULL;
145   CHKERRQ(PetscObjectStateIncrease((PetscObject)A));
146   PetscFunctionReturn(0);
147 }
148 
149 /*@C
150   MatKAIJRestoreSRead - Restore array obtained with MatKAIJGetSRead()
151 
152   Not collective
153 
154   Input Parameter:
155 . A - the KAIJ matrix
156 
157   Output Parameter:
158 . S - location of pointer to array obtained with MatKAIJGetS()
159 
160   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
161   If NULL is passed, it will not attempt to zero the array pointer.
162 
163   Level: advanced
164 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead()
165 @*/
166 PetscErrorCode MatKAIJRestoreSRead(Mat A,const PetscScalar **S)
167 {
168   PetscFunctionBegin;
169   if (S) *S = NULL;
170   PetscFunctionReturn(0);
171 }
172 
173 /*@C
174    MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix
175 
176    Not Collective; the entire T is stored and returned independently on all processes
177 
178    Input Parameter:
179 .  A - the KAIJ matrix
180 
181    Output Parameters:
182 +  m - the number of rows in T
183 .  n - the number of columns in T
184 -  T - the T matrix, in form of a scalar array in column-major format
185 
186    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
187 
188    Level: advanced
189 
190 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
191 @*/
192 PetscErrorCode MatKAIJGetT(Mat A,PetscInt *m,PetscInt *n,PetscScalar **T)
193 {
194   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
195   PetscFunctionBegin;
196   if (m) *m = b->p;
197   if (n) *n = b->q;
198   if (T) *T = b->T;
199   PetscFunctionReturn(0);
200 }
201 
202 /*@C
203    MatKAIJGetTRead - Get a read-only pointer to the transformation matrix T associated with the KAIJ matrix
204 
205    Not Collective; the entire T is stored and returned independently on all processes
206 
207    Input Parameter:
208 .  A - the KAIJ matrix
209 
210    Output Parameters:
211 +  m - the number of rows in T
212 .  n - the number of columns in T
213 -  T - the T matrix, in form of a scalar array in column-major format
214 
215    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
216 
217    Level: advanced
218 
219 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
220 @*/
221 PetscErrorCode MatKAIJGetTRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **T)
222 {
223   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
224   PetscFunctionBegin;
225   if (m) *m = b->p;
226   if (n) *n = b->q;
227   if (T) *T = b->T;
228   PetscFunctionReturn(0);
229 }
230 
231 /*@C
232   MatKAIJRestoreT - Restore array obtained with MatKAIJGetT()
233 
234   Not collective
235 
236   Input Parameter:
237 . A - the KAIJ matrix
238 
239   Output Parameter:
240 . T - location of pointer to array obtained with MatKAIJGetS()
241 
242   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
243   If NULL is passed, it will not attempt to zero the array pointer.
244 
245   Level: advanced
246 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead()
247 @*/
248 PetscErrorCode MatKAIJRestoreT(Mat A,PetscScalar **T)
249 {
250   PetscFunctionBegin;
251   if (T) *T = NULL;
252   CHKERRQ(PetscObjectStateIncrease((PetscObject)A));
253   PetscFunctionReturn(0);
254 }
255 
256 /*@C
257   MatKAIJRestoreTRead - Restore array obtained with MatKAIJGetTRead()
258 
259   Not collective
260 
261   Input Parameter:
262 . A - the KAIJ matrix
263 
264   Output Parameter:
265 . T - location of pointer to array obtained with MatKAIJGetS()
266 
267   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
268   If NULL is passed, it will not attempt to zero the array pointer.
269 
270   Level: advanced
271 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead()
272 @*/
273 PetscErrorCode MatKAIJRestoreTRead(Mat A,const PetscScalar **T)
274 {
275   PetscFunctionBegin;
276   if (T) *T = NULL;
277   PetscFunctionReturn(0);
278 }
279 
280 /*@
281    MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix
282 
283    Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel
284 
285    Input Parameters:
286 +  A - the KAIJ matrix
287 -  B - the AIJ matrix
288 
289    Notes:
290    This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed.
291    Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix.
292 
293    Level: advanced
294 
295 .seealso: MatKAIJGetAIJ(), MatKAIJSetS(), MatKAIJSetT()
296 @*/
297 PetscErrorCode MatKAIJSetAIJ(Mat A,Mat B)
298 {
299   PetscMPIInt    size;
300   PetscBool      flg;
301 
302   PetscFunctionBegin;
303   CHKERRMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
304   if (size == 1) {
305     CHKERRQ(PetscObjectTypeCompare((PetscObject)B,MATSEQAIJ,&flg));
306     PetscCheck(flg,PetscObjectComm((PetscObject)B),PETSC_ERR_SUP,"MatKAIJSetAIJ() with MATSEQKAIJ does not support %s as the AIJ mat",((PetscObject)B)->type_name);
307     Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
308     a->AIJ = B;
309   } else {
310     Mat_MPIKAIJ *a = (Mat_MPIKAIJ*)A->data;
311     a->A = B;
312   }
313   CHKERRQ(PetscObjectReference((PetscObject)B));
314   PetscFunctionReturn(0);
315 }
316 
317 /*@C
318    MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix
319 
320    Logically Collective; the entire S is stored independently on all processes.
321 
322    Input Parameters:
323 +  A - the KAIJ matrix
324 .  p - the number of rows in S
325 .  q - the number of columns in S
326 -  S - the S matrix, in form of a scalar array in column-major format
327 
328    Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix.
329    The S matrix is copied, so the user can destroy this array.
330 
331    Level: Advanced
332 
333 .seealso: MatKAIJGetS(), MatKAIJSetT(), MatKAIJSetAIJ()
334 @*/
335 PetscErrorCode MatKAIJSetS(Mat A,PetscInt p,PetscInt q,const PetscScalar S[])
336 {
337   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
338 
339   PetscFunctionBegin;
340   CHKERRQ(PetscFree(a->S));
341   if (S) {
342     CHKERRQ(PetscMalloc1(p*q,&a->S));
343     CHKERRQ(PetscMemcpy(a->S,S,p*q*sizeof(PetscScalar)));
344   } else  a->S = NULL;
345 
346   a->p = p;
347   a->q = q;
348   PetscFunctionReturn(0);
349 }
350 
351 /*@C
352    MatKAIJGetScaledIdentity - Check if both S and T are scaled identities.
353 
354    Logically Collective.
355 
356    Input Parameter:
357 .  A - the KAIJ matrix
358 
359   Output Parameter:
360 .  identity - the Boolean value
361 
362    Level: Advanced
363 
364 .seealso: MatKAIJGetS(), MatKAIJGetT()
365 @*/
366 PetscErrorCode MatKAIJGetScaledIdentity(Mat A,PetscBool* identity)
367 {
368   Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
369   PetscInt    i,j;
370 
371   PetscFunctionBegin;
372   if (a->p != a->q) {
373     *identity = PETSC_FALSE;
374     PetscFunctionReturn(0);
375   } else *identity = PETSC_TRUE;
376   if (!a->isTI || a->S) {
377     for (i=0; i<a->p && *identity; i++) {
378       for (j=0; j<a->p && *identity; j++) {
379         if (i != j) {
380           if (a->S && PetscAbsScalar(a->S[i+j*a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
381           if (a->T && PetscAbsScalar(a->T[i+j*a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
382         } else {
383           if (a->S && PetscAbsScalar(a->S[i*(a->p+1)]-a->S[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
384           if (a->T && PetscAbsScalar(a->T[i*(a->p+1)]-a->T[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
385         }
386       }
387     }
388   }
389   PetscFunctionReturn(0);
390 }
391 
392 /*@C
393    MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix
394 
395    Logically Collective; the entire T is stored independently on all processes.
396 
397    Input Parameters:
398 +  A - the KAIJ matrix
399 .  p - the number of rows in S
400 .  q - the number of columns in S
401 -  T - the T matrix, in form of a scalar array in column-major format
402 
403    Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix.
404    The T matrix is copied, so the user can destroy this array.
405 
406    Level: Advanced
407 
408 .seealso: MatKAIJGetT(), MatKAIJSetS(), MatKAIJSetAIJ()
409 @*/
410 PetscErrorCode MatKAIJSetT(Mat A,PetscInt p,PetscInt q,const PetscScalar T[])
411 {
412   PetscInt       i,j;
413   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
414   PetscBool      isTI = PETSC_FALSE;
415 
416   PetscFunctionBegin;
417   /* check if T is an identity matrix */
418   if (T && (p == q)) {
419     isTI = PETSC_TRUE;
420     for (i=0; i<p; i++) {
421       for (j=0; j<q; j++) {
422         if (i == j) {
423           /* diagonal term must be 1 */
424           if (T[i+j*p] != 1.0) isTI = PETSC_FALSE;
425         } else {
426           /* off-diagonal term must be 0 */
427           if (T[i+j*p] != 0.0) isTI = PETSC_FALSE;
428         }
429       }
430     }
431   }
432   a->isTI = isTI;
433 
434   CHKERRQ(PetscFree(a->T));
435   if (T && (!isTI)) {
436     CHKERRQ(PetscMalloc1(p*q,&a->T));
437     CHKERRQ(PetscMemcpy(a->T,T,p*q*sizeof(PetscScalar)));
438   } else a->T = NULL;
439 
440   a->p = p;
441   a->q = q;
442   PetscFunctionReturn(0);
443 }
444 
445 PetscErrorCode MatDestroy_SeqKAIJ(Mat A)
446 {
447   Mat_SeqKAIJ    *b = (Mat_SeqKAIJ*)A->data;
448 
449   PetscFunctionBegin;
450   CHKERRQ(MatDestroy(&b->AIJ));
451   CHKERRQ(PetscFree(b->S));
452   CHKERRQ(PetscFree(b->T));
453   CHKERRQ(PetscFree(b->ibdiag));
454   CHKERRQ(PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr));
455   CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqkaij_seqaij_C",NULL));
456   CHKERRQ(PetscFree(A->data));
457   PetscFunctionReturn(0);
458 }
459 
460 PETSC_INTERN PetscErrorCode MatKAIJ_build_AIJ_OAIJ(Mat A)
461 {
462   Mat_MPIKAIJ      *a;
463   Mat_MPIAIJ       *mpiaij;
464   PetscScalar      *T;
465   PetscInt         i,j;
466   PetscObjectState state;
467 
468   PetscFunctionBegin;
469   a = (Mat_MPIKAIJ*)A->data;
470   mpiaij = (Mat_MPIAIJ*)a->A->data;
471 
472   CHKERRQ(PetscObjectStateGet((PetscObject)a->A,&state));
473   if (state == a->state) {
474     /* The existing AIJ and KAIJ members are up-to-date, so simply exit. */
475     PetscFunctionReturn(0);
476   } else {
477     CHKERRQ(MatDestroy(&a->AIJ));
478     CHKERRQ(MatDestroy(&a->OAIJ));
479     if (a->isTI) {
480       /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL.
481        * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will
482        * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix
483        * to pass in. */
484       CHKERRQ(PetscMalloc1(a->p*a->q,&T));
485       for (i=0; i<a->p; i++) {
486         for (j=0; j<a->q; j++) {
487           if (i==j) T[i+j*a->p] = 1.0;
488           else      T[i+j*a->p] = 0.0;
489         }
490       }
491     } else T = a->T;
492     CHKERRQ(MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ));
493     CHKERRQ(MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ));
494     if (a->isTI) {
495       CHKERRQ(PetscFree(T));
496     }
497     a->state = state;
498   }
499 
500   PetscFunctionReturn(0);
501 }
502 
503 PetscErrorCode MatSetUp_KAIJ(Mat A)
504 {
505   PetscInt       n;
506   PetscMPIInt    size;
507   Mat_SeqKAIJ    *seqkaij = (Mat_SeqKAIJ*)A->data;
508 
509   PetscFunctionBegin;
510   CHKERRMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
511   if (size == 1) {
512     CHKERRQ(MatSetSizes(A,seqkaij->p*seqkaij->AIJ->rmap->n,seqkaij->q*seqkaij->AIJ->cmap->n,seqkaij->p*seqkaij->AIJ->rmap->N,seqkaij->q*seqkaij->AIJ->cmap->N));
513     CHKERRQ(PetscLayoutSetBlockSize(A->rmap,seqkaij->p));
514     CHKERRQ(PetscLayoutSetBlockSize(A->cmap,seqkaij->q));
515     CHKERRQ(PetscLayoutSetUp(A->rmap));
516     CHKERRQ(PetscLayoutSetUp(A->cmap));
517   } else {
518     Mat_MPIKAIJ *a;
519     Mat_MPIAIJ  *mpiaij;
520     IS          from,to;
521     Vec         gvec;
522 
523     a = (Mat_MPIKAIJ*)A->data;
524     mpiaij = (Mat_MPIAIJ*)a->A->data;
525     CHKERRQ(MatSetSizes(A,a->p*a->A->rmap->n,a->q*a->A->cmap->n,a->p*a->A->rmap->N,a->q*a->A->cmap->N));
526     CHKERRQ(PetscLayoutSetBlockSize(A->rmap,seqkaij->p));
527     CHKERRQ(PetscLayoutSetBlockSize(A->cmap,seqkaij->q));
528     CHKERRQ(PetscLayoutSetUp(A->rmap));
529     CHKERRQ(PetscLayoutSetUp(A->cmap));
530 
531     CHKERRQ(MatKAIJ_build_AIJ_OAIJ(A));
532 
533     CHKERRQ(VecGetSize(mpiaij->lvec,&n));
534     CHKERRQ(VecCreate(PETSC_COMM_SELF,&a->w));
535     CHKERRQ(VecSetSizes(a->w,n*a->q,n*a->q));
536     CHKERRQ(VecSetBlockSize(a->w,a->q));
537     CHKERRQ(VecSetType(a->w,VECSEQ));
538 
539     /* create two temporary Index sets for build scatter gather */
540     CHKERRQ(ISCreateBlock(PetscObjectComm((PetscObject)a->A),a->q,n,mpiaij->garray,PETSC_COPY_VALUES,&from));
541     CHKERRQ(ISCreateStride(PETSC_COMM_SELF,n*a->q,0,1,&to));
542 
543     /* create temporary global vector to generate scatter context */
544     CHKERRQ(VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A),a->q,a->q*a->A->cmap->n,a->q*a->A->cmap->N,NULL,&gvec));
545 
546     /* generate the scatter context */
547     CHKERRQ(VecScatterCreate(gvec,from,a->w,to,&a->ctx));
548 
549     CHKERRQ(ISDestroy(&from));
550     CHKERRQ(ISDestroy(&to));
551     CHKERRQ(VecDestroy(&gvec));
552   }
553 
554   A->assembled = PETSC_TRUE;
555   PetscFunctionReturn(0);
556 }
557 
558 PetscErrorCode MatView_KAIJ(Mat A,PetscViewer viewer)
559 {
560   PetscViewerFormat format;
561   Mat_SeqKAIJ       *a = (Mat_SeqKAIJ*)A->data;
562   Mat               B;
563   PetscInt          i;
564   PetscBool         ismpikaij;
565 
566   PetscFunctionBegin;
567   CHKERRQ(PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij));
568   CHKERRQ(PetscViewerGetFormat(viewer,&format));
569   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) {
570     CHKERRQ(PetscViewerASCIIPrintf(viewer,"S and T have %" PetscInt_FMT " rows and %" PetscInt_FMT " columns\n",a->p,a->q));
571 
572     /* Print appropriate details for S. */
573     if (!a->S) {
574       CHKERRQ(PetscViewerASCIIPrintf(viewer,"S is NULL\n"));
575     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
576       CHKERRQ(PetscViewerASCIIPrintf(viewer,"Entries of S are "));
577       for (i=0; i<(a->p * a->q); i++) {
578 #if defined(PETSC_USE_COMPLEX)
579         CHKERRQ(PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->S[i]),(double)PetscImaginaryPart(a->S[i])));
580 #else
581         CHKERRQ(PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->S[i])));
582 #endif
583       }
584       CHKERRQ(PetscViewerASCIIPrintf(viewer,"\n"));
585     }
586 
587     /* Print appropriate details for T. */
588     if (a->isTI) {
589       CHKERRQ(PetscViewerASCIIPrintf(viewer,"T is the identity matrix\n"));
590     } else if (!a->T) {
591       CHKERRQ(PetscViewerASCIIPrintf(viewer,"T is NULL\n"));
592     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
593       CHKERRQ(PetscViewerASCIIPrintf(viewer,"Entries of T are "));
594       for (i=0; i<(a->p * a->q); i++) {
595 #if defined(PETSC_USE_COMPLEX)
596         CHKERRQ(PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->T[i]),(double)PetscImaginaryPart(a->T[i])));
597 #else
598         CHKERRQ(PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->T[i])));
599 #endif
600       }
601       CHKERRQ(PetscViewerASCIIPrintf(viewer,"\n"));
602     }
603 
604     /* Now print details for the AIJ matrix, using the AIJ viewer. */
605     CHKERRQ(PetscViewerASCIIPrintf(viewer,"Now viewing the associated AIJ matrix:\n"));
606     if (ismpikaij) {
607       Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
608       CHKERRQ(MatView(b->A,viewer));
609     } else {
610       CHKERRQ(MatView(a->AIJ,viewer));
611     }
612 
613   } else {
614     /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */
615     CHKERRQ(MatConvert(A,MATAIJ,MAT_INITIAL_MATRIX,&B));
616     CHKERRQ(MatView(B,viewer));
617     CHKERRQ(MatDestroy(&B));
618   }
619   PetscFunctionReturn(0);
620 }
621 
622 PetscErrorCode MatDestroy_MPIKAIJ(Mat A)
623 {
624   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
625 
626   PetscFunctionBegin;
627   CHKERRQ(MatDestroy(&b->AIJ));
628   CHKERRQ(MatDestroy(&b->OAIJ));
629   CHKERRQ(MatDestroy(&b->A));
630   CHKERRQ(VecScatterDestroy(&b->ctx));
631   CHKERRQ(VecDestroy(&b->w));
632   CHKERRQ(PetscFree(b->S));
633   CHKERRQ(PetscFree(b->T));
634   CHKERRQ(PetscFree(b->ibdiag));
635   CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",NULL));
636   CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpikaij_mpiaij_C",NULL));
637   CHKERRQ(PetscFree(A->data));
638   PetscFunctionReturn(0);
639 }
640 
641 /* --------------------------------------------------------------------------------------*/
642 
643 /* zz = yy + Axx */
644 PetscErrorCode MatMultAdd_SeqKAIJ(Mat A,Vec xx,Vec yy,Vec zz)
645 {
646   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ*)A->data;
647   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
648   const PetscScalar *s = b->S, *t = b->T;
649   const PetscScalar *x,*v,*bx;
650   PetscScalar       *y,*sums;
651   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
652   PetscInt          n,i,jrow,j,l,p=b->p,q=b->q,k;
653 
654   PetscFunctionBegin;
655   if (!yy) {
656     CHKERRQ(VecSet(zz,0.0));
657   } else {
658     CHKERRQ(VecCopy(yy,zz));
659   }
660   if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0);
661 
662   CHKERRQ(VecGetArrayRead(xx,&x));
663   CHKERRQ(VecGetArray(zz,&y));
664   idx  = a->j;
665   v    = a->a;
666   ii   = a->i;
667 
668   if (b->isTI) {
669     for (i=0; i<m; i++) {
670       jrow = ii[i];
671       n    = ii[i+1] - jrow;
672       sums = y + p*i;
673       for (j=0; j<n; j++) {
674         for (k=0; k<p; k++) {
675           sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k];
676         }
677       }
678     }
679     CHKERRQ(PetscLogFlops(3.0*(a->nz)*p));
680   } else if (t) {
681     for (i=0; i<m; i++) {
682       jrow = ii[i];
683       n    = ii[i+1] - jrow;
684       sums = y + p*i;
685       for (j=0; j<n; j++) {
686         for (k=0; k<p; k++) {
687           for (l=0; l<q; l++) {
688             sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l];
689           }
690         }
691       }
692     }
693     /* The flop count below assumes that v[jrow+j] is hoisted out (which an optimizing compiler is likely to do),
694      * and also that T part is hoisted outside this loop (in exchange for temporary storage) as (A \otimes I) (I \otimes T),
695      * so that this multiply doesn't have to be redone for each matrix entry, but just once per column. The latter
696      * transformation is much less likely to be applied, but we nonetheless count the minimum flops required. */
697     CHKERRQ(PetscLogFlops((2.0*p*q-p)*m+2.0*p*a->nz));
698   }
699   if (s) {
700     for (i=0; i<m; i++) {
701       sums = y + p*i;
702       bx   = x + q*i;
703       if (i < b->AIJ->cmap->n) {
704         for (j=0; j<q; j++) {
705           for (k=0; k<p; k++) {
706             sums[k] += s[k+j*p]*bx[j];
707           }
708         }
709       }
710     }
711     CHKERRQ(PetscLogFlops(2.0*m*p*q));
712   }
713 
714   CHKERRQ(VecRestoreArrayRead(xx,&x));
715   CHKERRQ(VecRestoreArray(zz,&y));
716   PetscFunctionReturn(0);
717 }
718 
719 PetscErrorCode MatMult_SeqKAIJ(Mat A,Vec xx,Vec yy)
720 {
721   PetscFunctionBegin;
722   CHKERRQ(MatMultAdd_SeqKAIJ(A,xx,PETSC_NULL,yy));
723   PetscFunctionReturn(0);
724 }
725 
726 #include <petsc/private/kernels/blockinvert.h>
727 
728 PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A,const PetscScalar **values)
729 {
730   Mat_SeqKAIJ       *b  = (Mat_SeqKAIJ*)A->data;
731   Mat_SeqAIJ        *a  = (Mat_SeqAIJ*)b->AIJ->data;
732   const PetscScalar *S  = b->S;
733   const PetscScalar *T  = b->T;
734   const PetscScalar *v  = a->a;
735   const PetscInt     p  = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i;
736   PetscInt          i,j,*v_pivots,dof,dof2;
737   PetscScalar       *diag,aval,*v_work;
738 
739   PetscFunctionBegin;
740   PetscCheckFalse(p != q,PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse.");
741   PetscCheckFalse((!S) && (!T) && (!b->isTI),PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix.");
742 
743   dof  = p;
744   dof2 = dof*dof;
745 
746   if (b->ibdiagvalid) {
747     if (values) *values = b->ibdiag;
748     PetscFunctionReturn(0);
749   }
750   if (!b->ibdiag) {
751     CHKERRQ(PetscMalloc1(dof2*m,&b->ibdiag));
752     CHKERRQ(PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar)));
753   }
754   if (values) *values = b->ibdiag;
755   diag = b->ibdiag;
756 
757   CHKERRQ(PetscMalloc2(dof,&v_work,dof,&v_pivots));
758   for (i=0; i<m; i++) {
759     if (S) {
760       CHKERRQ(PetscMemcpy(diag,S,dof2*sizeof(PetscScalar)));
761     } else {
762       CHKERRQ(PetscMemzero(diag,dof2*sizeof(PetscScalar)));
763     }
764     if (b->isTI) {
765       aval = 0;
766       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
767       for (j=0; j<dof; j++) diag[j+dof*j] += aval;
768     } else if (T) {
769       aval = 0;
770       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
771       for (j=0; j<dof2; j++) diag[j] += aval*T[j];
772     }
773     CHKERRQ(PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL));
774     diag += dof2;
775   }
776   CHKERRQ(PetscFree2(v_work,v_pivots));
777 
778   b->ibdiagvalid = PETSC_TRUE;
779   PetscFunctionReturn(0);
780 }
781 
782 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B)
783 {
784   Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data;
785 
786   PetscFunctionBegin;
787   *B = kaij->AIJ;
788   PetscFunctionReturn(0);
789 }
790 
791 static PetscErrorCode MatConvert_KAIJ_AIJ(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
792 {
793   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
794   Mat            AIJ,OAIJ,B;
795   PetscInt       *d_nnz,*o_nnz = NULL,nz,i,j,m,d;
796   const PetscInt p = a->p,q = a->q;
797   PetscBool      ismpikaij,missing;
798 
799   PetscFunctionBegin;
800   if (reuse != MAT_REUSE_MATRIX) {
801     CHKERRQ(PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij));
802     if (ismpikaij) {
803       Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
804       AIJ = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ;
805       OAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ;
806     } else {
807       AIJ = a->AIJ;
808       OAIJ = NULL;
809     }
810     CHKERRQ(MatCreate(PetscObjectComm((PetscObject)A),&B));
811     CHKERRQ(MatSetSizes(B,A->rmap->n,A->cmap->n,A->rmap->N,A->cmap->N));
812     CHKERRQ(MatSetType(B,MATAIJ));
813     CHKERRQ(MatGetSize(AIJ,&m,NULL));
814     CHKERRQ(MatMissingDiagonal(AIJ,&missing,&d)); /* assumption that all successive rows will have a missing diagonal */
815     if (!missing || !a->S) d = m;
816     CHKERRQ(PetscMalloc1(m*p,&d_nnz));
817     for (i = 0; i < m; ++i) {
818       CHKERRQ(MatGetRow_SeqAIJ(AIJ,i,&nz,NULL,NULL));
819       for (j = 0; j < p; ++j) d_nnz[i*p + j] = nz*q + (i >= d)*q;
820       CHKERRQ(MatRestoreRow_SeqAIJ(AIJ,i,&nz,NULL,NULL));
821     }
822     if (OAIJ) {
823       CHKERRQ(PetscMalloc1(m*p,&o_nnz));
824       for (i = 0; i < m; ++i) {
825         CHKERRQ(MatGetRow_SeqAIJ(OAIJ,i,&nz,NULL,NULL));
826         for (j = 0; j < p; ++j) o_nnz[i*p + j] = nz*q;
827         CHKERRQ(MatRestoreRow_SeqAIJ(OAIJ,i,&nz,NULL,NULL));
828       }
829       CHKERRQ(MatMPIAIJSetPreallocation(B,0,d_nnz,0,o_nnz));
830     } else {
831       CHKERRQ(MatSeqAIJSetPreallocation(B,0,d_nnz));
832     }
833     CHKERRQ(PetscFree(d_nnz));
834     CHKERRQ(PetscFree(o_nnz));
835   } else B = *newmat;
836   CHKERRQ(MatConvert_Basic(A,newtype,MAT_REUSE_MATRIX,&B));
837   if (reuse == MAT_INPLACE_MATRIX) {
838     CHKERRQ(MatHeaderReplace(A,&B));
839   } else *newmat = B;
840   PetscFunctionReturn(0);
841 }
842 
843 PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
844 {
845   PetscErrorCode    ierr;
846   Mat_SeqKAIJ       *kaij = (Mat_SeqKAIJ*) A->data;
847   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)kaij->AIJ->data;
848   const PetscScalar *aa = a->a, *T = kaij->T, *v;
849   const PetscInt    m  = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi;
850   const PetscScalar *b, *xb, *idiag;
851   PetscScalar       *x, *work, *workt, *w, *y, *arr, *t, *arrt;
852   PetscInt          i, j, k, i2, bs, bs2, nz;
853 
854   PetscFunctionBegin;
855   its = its*lits;
856   PetscCheckFalse(flag & SOR_EISENSTAT,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
857   PetscCheckFalse(its <= 0,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %" PetscInt_FMT " and local its %" PetscInt_FMT " both positive",its,lits);
858   PetscCheck(!fshift,PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift");
859   PetscCheckFalse((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER),PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts");
860   PetscCheckFalse(p != q,PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: No support for non-square dense blocks");
861   else        {bs = p; bs2 = bs*bs; }
862 
863   if (!m) PetscFunctionReturn(0);
864 
865   if (!kaij->ibdiagvalid) CHKERRQ(MatInvertBlockDiagonal_SeqKAIJ(A,NULL));
866   idiag = kaij->ibdiag;
867   diag  = a->diag;
868 
869   if (!kaij->sor.setup) {
870     CHKERRQ(PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr));
871     kaij->sor.setup = PETSC_TRUE;
872   }
873   y     = kaij->sor.y;
874   w     = kaij->sor.w;
875   work  = kaij->sor.work;
876   t     = kaij->sor.t;
877   arr   = kaij->sor.arr;
878 
879   ierr = VecGetArray(xx,&x);    CHKERRQ(ierr);
880   CHKERRQ(VecGetArrayRead(bb,&b));
881 
882   if (flag & SOR_ZERO_INITIAL_GUESS) {
883     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
884       PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);                            /* x[0:bs] <- D^{-1} b[0:bs] */
885       CHKERRQ(PetscMemcpy(t,b,bs*sizeof(PetscScalar)));
886       i2     =  bs;
887       idiag  += bs2;
888       for (i=1; i<m; i++) {
889         v  = aa + ai[i];
890         vi = aj + ai[i];
891         nz = diag[i] - ai[i];
892 
893         if (T) {                /* b - T (Arow * x) */
894           CHKERRQ(PetscMemzero(w,bs*sizeof(PetscScalar)));
895           for (j=0; j<nz; j++) {
896             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
897           }
898           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]);
899           for (k=0; k<bs; k++) t[i2+k] += b[i2+k];
900         } else if (kaij->isTI) {
901           CHKERRQ(PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar)));
902           for (j=0; j<nz; j++) {
903             for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k];
904           }
905         } else {
906           CHKERRQ(PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar)));
907         }
908 
909         PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y);
910         for (j=0; j<bs; j++) x[i2+j] = omega * y[j];
911 
912         idiag += bs2;
913         i2    += bs;
914       }
915       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
916       CHKERRQ(PetscLogFlops(1.0*bs2*a->nz));
917       xb = t;
918     } else xb = b;
919     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
920       idiag = kaij->ibdiag+bs2*(m-1);
921       i2    = bs * (m-1);
922       CHKERRQ(PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar)));
923       PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
924       i2    -= bs;
925       idiag -= bs2;
926       for (i=m-2; i>=0; i--) {
927         v  = aa + diag[i] + 1 ;
928         vi = aj + diag[i] + 1;
929         nz = ai[i+1] - diag[i] - 1;
930 
931         if (T) {                /* FIXME: This branch untested */
932           CHKERRQ(PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar)));
933           /* copy all rows of x that are needed into contiguous space */
934           workt = work;
935           for (j=0; j<nz; j++) {
936             CHKERRQ(PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar)));
937             workt += bs;
938           }
939           arrt = arr;
940           for (j=0; j<nz; j++) {
941             CHKERRQ(PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar)));
942             for (k=0; k<bs2; k++) arrt[k] *= v[j];
943             arrt += bs2;
944           }
945           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
946         } else if (kaij->isTI) {
947           CHKERRQ(PetscMemcpy(w,t+i2,bs*sizeof(PetscScalar)));
948           for (j=0; j<nz; j++) {
949             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
950           }
951         }
952 
953         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */
954         for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j];
955 
956         idiag -= bs2;
957         i2    -= bs;
958       }
959       CHKERRQ(PetscLogFlops(1.0*bs2*(a->nz)));
960     }
961     its--;
962   }
963   while (its--) {               /* FIXME: This branch not updated */
964     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
965       i2     =  0;
966       idiag  = kaij->ibdiag;
967       for (i=0; i<m; i++) {
968         CHKERRQ(PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar)));
969 
970         v  = aa + ai[i];
971         vi = aj + ai[i];
972         nz = diag[i] - ai[i];
973         workt = work;
974         for (j=0; j<nz; j++) {
975           CHKERRQ(PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar)));
976           workt += bs;
977         }
978         arrt = arr;
979         if (T) {
980           for (j=0; j<nz; j++) {
981             CHKERRQ(PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar)));
982             for (k=0; k<bs2; k++) arrt[k] *= v[j];
983             arrt += bs2;
984           }
985           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
986         } else if (kaij->isTI) {
987           for (j=0; j<nz; j++) {
988             CHKERRQ(PetscMemzero(arrt,bs2*sizeof(PetscScalar)));
989             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
990             arrt += bs2;
991           }
992           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
993         }
994         CHKERRQ(PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar)));
995 
996         v  = aa + diag[i] + 1;
997         vi = aj + diag[i] + 1;
998         nz = ai[i+1] - diag[i] - 1;
999         workt = work;
1000         for (j=0; j<nz; j++) {
1001           CHKERRQ(PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar)));
1002           workt += bs;
1003         }
1004         arrt = arr;
1005         if (T) {
1006           for (j=0; j<nz; j++) {
1007             CHKERRQ(PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar)));
1008             for (k=0; k<bs2; k++) arrt[k] *= v[j];
1009             arrt += bs2;
1010           }
1011           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1012         } else if (kaij->isTI) {
1013           for (j=0; j<nz; j++) {
1014             CHKERRQ(PetscMemzero(arrt,bs2*sizeof(PetscScalar)));
1015             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
1016             arrt += bs2;
1017           }
1018           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1019         }
1020 
1021         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
1022         for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
1023 
1024         idiag += bs2;
1025         i2    += bs;
1026       }
1027       xb = t;
1028     }
1029     else xb = b;
1030     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1031       idiag = kaij->ibdiag+bs2*(m-1);
1032       i2    = bs * (m-1);
1033       if (xb == b) {
1034         for (i=m-1; i>=0; i--) {
1035           CHKERRQ(PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar)));
1036 
1037           v  = aa + ai[i];
1038           vi = aj + ai[i];
1039           nz = diag[i] - ai[i];
1040           workt = work;
1041           for (j=0; j<nz; j++) {
1042             CHKERRQ(PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar)));
1043             workt += bs;
1044           }
1045           arrt = arr;
1046           if (T) {
1047             for (j=0; j<nz; j++) {
1048               CHKERRQ(PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar)));
1049               for (k=0; k<bs2; k++) arrt[k] *= v[j];
1050               arrt += bs2;
1051             }
1052             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1053           } else if (kaij->isTI) {
1054             for (j=0; j<nz; j++) {
1055               CHKERRQ(PetscMemzero(arrt,bs2*sizeof(PetscScalar)));
1056               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
1057               arrt += bs2;
1058             }
1059             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1060           }
1061 
1062           v  = aa + diag[i] + 1;
1063           vi = aj + diag[i] + 1;
1064           nz = ai[i+1] - diag[i] - 1;
1065           workt = work;
1066           for (j=0; j<nz; j++) {
1067             CHKERRQ(PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar)));
1068             workt += bs;
1069           }
1070           arrt = arr;
1071           if (T) {
1072             for (j=0; j<nz; j++) {
1073               CHKERRQ(PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar)));
1074               for (k=0; k<bs2; k++) arrt[k] *= v[j];
1075               arrt += bs2;
1076             }
1077             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1078           } else if (kaij->isTI) {
1079             for (j=0; j<nz; j++) {
1080               CHKERRQ(PetscMemzero(arrt,bs2*sizeof(PetscScalar)));
1081               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
1082               arrt += bs2;
1083             }
1084             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1085           }
1086 
1087           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
1088           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
1089         }
1090       } else {
1091         for (i=m-1; i>=0; i--) {
1092           CHKERRQ(PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar)));
1093           v  = aa + diag[i] + 1;
1094           vi = aj + diag[i] + 1;
1095           nz = ai[i+1] - diag[i] - 1;
1096           workt = work;
1097           for (j=0; j<nz; j++) {
1098             CHKERRQ(PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar)));
1099             workt += bs;
1100           }
1101           arrt = arr;
1102           if (T) {
1103             for (j=0; j<nz; j++) {
1104               CHKERRQ(PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar)));
1105               for (k=0; k<bs2; k++) arrt[k] *= v[j];
1106               arrt += bs2;
1107             }
1108             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1109           } else if (kaij->isTI) {
1110             for (j=0; j<nz; j++) {
1111               CHKERRQ(PetscMemzero(arrt,bs2*sizeof(PetscScalar)));
1112               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
1113               arrt += bs2;
1114             }
1115             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1116           }
1117           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
1118           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
1119         }
1120       }
1121       CHKERRQ(PetscLogFlops(1.0*bs2*(a->nz)));
1122     }
1123   }
1124 
1125   ierr = VecRestoreArray(xx,&x);    CHKERRQ(ierr);
1126   CHKERRQ(VecRestoreArrayRead(bb,&b));
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 /*===================================================================================*/
1131 
1132 PetscErrorCode MatMultAdd_MPIKAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1133 {
1134   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
1135 
1136   PetscFunctionBegin;
1137   if (!yy) {
1138     CHKERRQ(VecSet(zz,0.0));
1139   } else {
1140     CHKERRQ(VecCopy(yy,zz));
1141   }
1142   CHKERRQ(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */
1143   /* start the scatter */
1144   CHKERRQ(VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD));
1145   CHKERRQ((*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz));
1146   CHKERRQ(VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD));
1147   CHKERRQ((*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz));
1148   PetscFunctionReturn(0);
1149 }
1150 
1151 PetscErrorCode MatMult_MPIKAIJ(Mat A,Vec xx,Vec yy)
1152 {
1153   PetscFunctionBegin;
1154   CHKERRQ(MatMultAdd_MPIKAIJ(A,xx,PETSC_NULL,yy));
1155   PetscFunctionReturn(0);
1156 }
1157 
1158 PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A,const PetscScalar **values)
1159 {
1160   Mat_MPIKAIJ     *b = (Mat_MPIKAIJ*)A->data;
1161 
1162   PetscFunctionBegin;
1163   CHKERRQ(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ is up to date. */
1164   CHKERRQ((*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values));
1165   PetscFunctionReturn(0);
1166 }
1167 
1168 /* ----------------------------------------------------------------*/
1169 
1170 PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
1171 {
1172   Mat_SeqKAIJ     *b   = (Mat_SeqKAIJ*) A->data;
1173   PetscErrorCode  diag = PETSC_FALSE;
1174   PetscInt        nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c;
1175   PetscScalar     *vaij,*v,*S=b->S,*T=b->T;
1176 
1177   PetscFunctionBegin;
1178   PetscCheck(!b->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1179   b->getrowactive = PETSC_TRUE;
1180   PetscCheckFalse(row < 0 || row >= A->rmap->n,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %" PetscInt_FMT " out of range",row);
1181 
1182   if ((!S) && (!T) && (!b->isTI)) {
1183     if (ncols)    *ncols  = 0;
1184     if (cols)     *cols   = NULL;
1185     if (values)   *values = NULL;
1186     PetscFunctionReturn(0);
1187   }
1188 
1189   if (T || b->isTI) {
1190     CHKERRQ(MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij));
1191     c     = nzaij;
1192     for (i=0; i<nzaij; i++) {
1193       /* check if this row contains a diagonal entry */
1194       if (colsaij[i] == r) {
1195         diag = PETSC_TRUE;
1196         c = i;
1197       }
1198     }
1199   } else nzaij = c = 0;
1200 
1201   /* calculate size of row */
1202   nz = 0;
1203   if (S)            nz += q;
1204   if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q);
1205 
1206   if (cols || values) {
1207     CHKERRQ(PetscMalloc2(nz,&idx,nz,&v));
1208     for (i=0; i<q; i++) {
1209       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1210       v[i] = 0.0;
1211     }
1212     if (b->isTI) {
1213       for (i=0; i<nzaij; i++) {
1214         for (j=0; j<q; j++) {
1215           idx[i*q+j] = colsaij[i]*q+j;
1216           v[i*q+j]   = (j==s ? vaij[i] : 0);
1217         }
1218       }
1219     } else if (T) {
1220       for (i=0; i<nzaij; i++) {
1221         for (j=0; j<q; j++) {
1222           idx[i*q+j] = colsaij[i]*q+j;
1223           v[i*q+j]   = vaij[i]*T[s+j*p];
1224         }
1225       }
1226     }
1227     if (S) {
1228       for (j=0; j<q; j++) {
1229         idx[c*q+j] = r*q+j;
1230         v[c*q+j]  += S[s+j*p];
1231       }
1232     }
1233   }
1234 
1235   if (ncols)    *ncols  = nz;
1236   if (cols)     *cols   = idx;
1237   if (values)   *values = v;
1238   PetscFunctionReturn(0);
1239 }
1240 
1241 PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1242 {
1243   PetscFunctionBegin;
1244   if (nz) *nz = 0;
1245   CHKERRQ(PetscFree2(*idx,*v));
1246   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
1247   PetscFunctionReturn(0);
1248 }
1249 
1250 PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
1251 {
1252   Mat_MPIKAIJ     *b      = (Mat_MPIKAIJ*) A->data;
1253   Mat             AIJ     = b->A;
1254   PetscBool       diag    = PETSC_FALSE;
1255   Mat             MatAIJ,MatOAIJ;
1256   const PetscInt  rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray;
1257   PetscInt        nz,*idx,ncolsaij = 0,ncolsoaij = 0,*colsaij,*colsoaij,r,s,c,i,j,lrow;
1258   PetscScalar     *v,*vals,*ovals,*S=b->S,*T=b->T;
1259 
1260   PetscFunctionBegin;
1261   CHKERRQ(MatKAIJ_build_AIJ_OAIJ(A)); /* Ensure b->AIJ and b->OAIJ are up to date. */
1262   MatAIJ  = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ;
1263   MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ;
1264   PetscCheck(!b->getrowactive,PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1265   b->getrowactive = PETSC_TRUE;
1266   PetscCheckFalse(row < rstart || row >= rend,PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1267   lrow = row - rstart;
1268 
1269   if ((!S) && (!T) && (!b->isTI)) {
1270     if (ncols)    *ncols  = 0;
1271     if (cols)     *cols   = NULL;
1272     if (values)   *values = NULL;
1273     PetscFunctionReturn(0);
1274   }
1275 
1276   r = lrow/p;
1277   s = lrow%p;
1278 
1279   if (T || b->isTI) {
1280     CHKERRQ(MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray));
1281     CHKERRQ(MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals));
1282     CHKERRQ(MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals));
1283 
1284     c     = ncolsaij + ncolsoaij;
1285     for (i=0; i<ncolsaij; i++) {
1286       /* check if this row contains a diagonal entry */
1287       if (colsaij[i] == r) {
1288         diag = PETSC_TRUE;
1289         c = i;
1290       }
1291     }
1292   } else c = 0;
1293 
1294   /* calculate size of row */
1295   nz = 0;
1296   if (S)            nz += q;
1297   if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q);
1298 
1299   if (cols || values) {
1300     CHKERRQ(PetscMalloc2(nz,&idx,nz,&v));
1301     for (i=0; i<q; i++) {
1302       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1303       v[i] = 0.0;
1304     }
1305     if (b->isTI) {
1306       for (i=0; i<ncolsaij; i++) {
1307         for (j=0; j<q; j++) {
1308           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1309           v[i*q+j]   = (j==s ? vals[i] : 0.0);
1310         }
1311       }
1312       for (i=0; i<ncolsoaij; i++) {
1313         for (j=0; j<q; j++) {
1314           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1315           v[(i+ncolsaij)*q+j]   = (j==s ? ovals[i]: 0.0);
1316         }
1317       }
1318     } else if (T) {
1319       for (i=0; i<ncolsaij; i++) {
1320         for (j=0; j<q; j++) {
1321           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1322           v[i*q+j]   = vals[i]*T[s+j*p];
1323         }
1324       }
1325       for (i=0; i<ncolsoaij; i++) {
1326         for (j=0; j<q; j++) {
1327           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1328           v[(i+ncolsaij)*q+j]   = ovals[i]*T[s+j*p];
1329         }
1330       }
1331     }
1332     if (S) {
1333       for (j=0; j<q; j++) {
1334         idx[c*q+j] = (r+rstart/p)*q+j;
1335         v[c*q+j]  += S[s+j*p];
1336       }
1337     }
1338   }
1339 
1340   if (ncols)  *ncols  = nz;
1341   if (cols)   *cols   = idx;
1342   if (values) *values = v;
1343   PetscFunctionReturn(0);
1344 }
1345 
1346 PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1347 {
1348   PetscFunctionBegin;
1349   CHKERRQ(PetscFree2(*idx,*v));
1350   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
1351   PetscFunctionReturn(0);
1352 }
1353 
1354 PetscErrorCode  MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
1355 {
1356   Mat            A;
1357 
1358   PetscFunctionBegin;
1359   CHKERRQ(MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A));
1360   CHKERRQ(MatCreateSubMatrix(A,isrow,iscol,cll,newmat));
1361   CHKERRQ(MatDestroy(&A));
1362   PetscFunctionReturn(0);
1363 }
1364 
1365 /* ---------------------------------------------------------------------------------- */
1366 /*@C
1367   MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form:
1368 
1369     [I \otimes S + A \otimes T]
1370 
1371   where
1372     S is a dense (p \times q) matrix
1373     T is a dense (p \times q) matrix
1374     A is an AIJ  (n \times n) matrix
1375     I is the identity matrix
1376   The resulting matrix is (np \times nq)
1377 
1378   S and T are always stored independently on all processes as PetscScalar arrays in column-major format.
1379 
1380   Collective
1381 
1382   Input Parameters:
1383 + A - the AIJ matrix
1384 . p - number of rows in S and T
1385 . q - number of columns in S and T
1386 . S - the S matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major)
1387 - T - the T matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major)
1388 
1389   Output Parameter:
1390 . kaij - the new KAIJ matrix
1391 
1392   Notes:
1393   This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed.
1394   Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix.
1395 
1396   Developer Notes:
1397   In the MATMPIKAIJ case, the internal 'AIJ' and 'OAIJ' sequential KAIJ matrices are kept up to date by tracking the object state
1398   of the AIJ matrix 'A' that describes the blockwise action of the MATMPIKAIJ matrix and, if the object state has changed, lazily
1399   rebuilding 'AIJ' and 'OAIJ' just before executing operations with the MATMPIKAIJ matrix. If new types of operations are added,
1400   routines implementing those must also ensure these are rebuilt when needed (by calling the internal MatKAIJ_build_AIJ_OAIJ() routine).
1401 
1402   Level: advanced
1403 
1404 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MATKAIJ
1405 @*/
1406 PetscErrorCode  MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij)
1407 {
1408   PetscFunctionBegin;
1409   CHKERRQ(MatCreate(PetscObjectComm((PetscObject)A),kaij));
1410   CHKERRQ(MatSetType(*kaij,MATKAIJ));
1411   CHKERRQ(MatKAIJSetAIJ(*kaij,A));
1412   CHKERRQ(MatKAIJSetS(*kaij,p,q,S));
1413   CHKERRQ(MatKAIJSetT(*kaij,p,q,T));
1414   CHKERRQ(MatSetUp(*kaij));
1415   PetscFunctionReturn(0);
1416 }
1417 
1418 /*MC
1419   MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of form
1420     [I \otimes S + A \otimes T],
1421   where
1422     S is a dense (p \times q) matrix,
1423     T is a dense (p \times q) matrix,
1424     A is an AIJ  (n \times n) matrix,
1425     and I is the identity matrix.
1426   The resulting matrix is (np \times nq).
1427 
1428   S and T are always stored independently on all processes as PetscScalar arrays in column-major format.
1429 
1430   Notes:
1431   A linear system with multiple right-hand sides, AX = B, can be expressed in the KAIJ-friendly form of (A \otimes I) x = b,
1432   where x and b are column vectors containing the row-major representations of X and B.
1433 
1434   Level: advanced
1435 
1436 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ()
1437 M*/
1438 
1439 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A)
1440 {
1441   Mat_MPIKAIJ    *b;
1442   PetscMPIInt    size;
1443 
1444   PetscFunctionBegin;
1445   CHKERRQ(PetscNewLog(A,&b));
1446   A->data  = (void*)b;
1447 
1448   CHKERRQ(PetscMemzero(A->ops,sizeof(struct _MatOps)));
1449 
1450   b->w    = NULL;
1451   CHKERRMPI(MPI_Comm_size(PetscObjectComm((PetscObject)A),&size));
1452   if (size == 1) {
1453     CHKERRQ(PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ));
1454     A->ops->destroy             = MatDestroy_SeqKAIJ;
1455     A->ops->mult                = MatMult_SeqKAIJ;
1456     A->ops->multadd             = MatMultAdd_SeqKAIJ;
1457     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ;
1458     A->ops->getrow              = MatGetRow_SeqKAIJ;
1459     A->ops->restorerow          = MatRestoreRow_SeqKAIJ;
1460     A->ops->sor                 = MatSOR_SeqKAIJ;
1461     CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatConvert_seqkaij_seqaij_C",MatConvert_KAIJ_AIJ));
1462   } else {
1463     CHKERRQ(PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ));
1464     A->ops->destroy             = MatDestroy_MPIKAIJ;
1465     A->ops->mult                = MatMult_MPIKAIJ;
1466     A->ops->multadd             = MatMultAdd_MPIKAIJ;
1467     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ;
1468     A->ops->getrow              = MatGetRow_MPIKAIJ;
1469     A->ops->restorerow          = MatRestoreRow_MPIKAIJ;
1470     CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ));
1471     CHKERRQ(PetscObjectComposeFunction((PetscObject)A,"MatConvert_mpikaij_mpiaij_C",MatConvert_KAIJ_AIJ));
1472   }
1473   A->ops->setup           = MatSetUp_KAIJ;
1474   A->ops->view            = MatView_KAIJ;
1475   A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ;
1476   PetscFunctionReturn(0);
1477 }
1478