xref: /petsc/src/mat/impls/kaij/kaij.c (revision 2ae760e34befb2308177273f8b26d6303047df25)
1 
2 /*
3   Defines the basic matrix operations for the KAIJ  matrix storage format.
4   This format is used to evaluate matrices of the form:
5 
6     [I \otimes S + A \otimes T]
7 
8   where
9     S is a dense (p \times q) matrix
10     T is a dense (p \times q) matrix
11     A is an AIJ  (n \times n) matrix
12     I is the identity matrix
13 
14   The resulting matrix is (np \times nq)
15 
16   We provide:
17      MatMult()
18      MatMultAdd()
19      MatInvertBlockDiagonal()
20   and
21      MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*)
22 
23   This single directory handles both the sequential and parallel codes
24 */
25 
26 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/
27 #include <../src/mat/utils/freespace.h>
28 #include <petsc/private/vecimpl.h>
29 
30 /*@C
31    MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix
32 
33    Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel
34 
35    Input Parameter:
36 .  A - the KAIJ matrix
37 
38    Output Parameter:
39 .  B - the AIJ matrix
40 
41    Level: advanced
42 
43    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
44 
45 .seealso: MatCreateKAIJ()
46 @*/
47 PetscErrorCode  MatKAIJGetAIJ(Mat A,Mat *B)
48 {
49   PetscErrorCode ierr;
50   PetscBool      ismpikaij,isseqkaij;
51 
52   PetscFunctionBegin;
53   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr);
54   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij);CHKERRQ(ierr);
55   if (ismpikaij) {
56     Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
57 
58     *B = b->A;
59   } else if (isseqkaij) {
60     Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
61 
62     *B = b->AIJ;
63   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix passed in is not of type KAIJ");
64   PetscFunctionReturn(0);
65 }
66 
67 /*@C
68    MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix
69 
70    Not Collective; the entire S is stored and returned independently on all processes.
71 
72    Input Parameter:
73 .  A - the KAIJ matrix
74 
75    Output Parameters:
76 +  m - the number of rows in S
77 .  n - the number of columns in S
78 -  S - the S matrix, in form of a scalar array in column-major format
79 
80    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
81 
82    Level: advanced
83 
84 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
85 @*/
86 PetscErrorCode MatKAIJGetS(Mat A,PetscInt *m,PetscInt *n,PetscScalar **S)
87 {
88   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
89   PetscFunctionBegin;
90   if (m) *m = b->p;
91   if (n) *n = b->q;
92   if (S) *S = b->S;
93   PetscFunctionReturn(0);
94 }
95 
96 /*@C
97    MatKAIJGetSRead - Get a read-only pointer to the S matrix describing the shift action of the KAIJ matrix
98 
99    Not Collective; the entire S is stored and returned independently on all processes.
100 
101    Input Parameter:
102 .  A - the KAIJ matrix
103 
104    Output Parameters:
105 +  m - the number of rows in S
106 .  n - the number of columns in S
107 -  S - the S matrix, in form of a scalar array in column-major format
108 
109    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
110 
111    Level: advanced
112 
113 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
114 @*/
115 PetscErrorCode MatKAIJGetSRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **S)
116 {
117   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
118   PetscFunctionBegin;
119   if (m) *m = b->p;
120   if (n) *n = b->q;
121   if (S) *S = b->S;
122   PetscFunctionReturn(0);
123 }
124 
125 /*@C
126   MatKAIJRestoreS - Restore array obtained with MatKAIJGetS()
127 
128   Not collective
129 
130   Input Parameter:
131 . A - the KAIJ matrix
132 
133   Output Parameter:
134 . S - location of pointer to array obtained with MatKAIJGetS()
135 
136   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
137   If NULL is passed, it will not attempt to zero the array pointer.
138 
139   Level: advanced
140 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead()
141 @*/
142 PetscErrorCode MatKAIJRestoreS(Mat A,PetscScalar **S)
143 {
144   PetscErrorCode ierr;
145 
146   PetscFunctionBegin;
147   if (S) *S = NULL;
148   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
149   PetscFunctionReturn(0);
150 }
151 
152 /*@C
153   MatKAIJRestoreSRead - Restore array obtained with MatKAIJGetSRead()
154 
155   Not collective
156 
157   Input Parameter:
158 . A - the KAIJ matrix
159 
160   Output Parameter:
161 . S - location of pointer to array obtained with MatKAIJGetS()
162 
163   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
164   If NULL is passed, it will not attempt to zero the array pointer.
165 
166   Level: advanced
167 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead()
168 @*/
169 PetscErrorCode MatKAIJRestoreSRead(Mat A,const PetscScalar **S)
170 {
171   PetscFunctionBegin;
172   if (S) *S = NULL;
173   PetscFunctionReturn(0);
174 }
175 
176 /*@C
177    MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix
178 
179    Not Collective; the entire T is stored and returned independently on all processes
180 
181    Input Parameter:
182 .  A - the KAIJ matrix
183 
184    Output Parameter:
185 +  m - the number of rows in T
186 .  n - the number of columns in T
187 -  T - the T matrix, in form of a scalar array in column-major format
188 
189    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
190 
191    Level: advanced
192 
193 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
194 @*/
195 PetscErrorCode MatKAIJGetT(Mat A,PetscInt *m,PetscInt *n,PetscScalar **T)
196 {
197   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
198   PetscFunctionBegin;
199   if (m) *m = b->p;
200   if (n) *n = b->q;
201   if (T) *T = b->T;
202   PetscFunctionReturn(0);
203 }
204 
205 /*@C
206    MatKAIJGetTRead - Get a read-only pointer to the transformation matrix T associated with the KAIJ matrix
207 
208    Not Collective; the entire T is stored and returned independently on all processes
209 
210    Input Parameter:
211 .  A - the KAIJ matrix
212 
213    Output Parameter:
214 +  m - the number of rows in T
215 .  n - the number of columns in T
216 -  T - the T matrix, in form of a scalar array in column-major format
217 
218    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
219 
220    Level: advanced
221 
222 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
223 @*/
224 PetscErrorCode MatKAIJGetTRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **T)
225 {
226   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
227   PetscFunctionBegin;
228   if (m) *m = b->p;
229   if (n) *n = b->q;
230   if (T) *T = b->T;
231   PetscFunctionReturn(0);
232 }
233 
234 /*@C
235   MatKAIJRestoreT - Restore array obtained with MatKAIJGetT()
236 
237   Not collective
238 
239   Input Parameter:
240 . A - the KAIJ matrix
241 
242   Output Parameter:
243 . T - location of pointer to array obtained with MatKAIJGetS()
244 
245   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
246   If NULL is passed, it will not attempt to zero the array pointer.
247 
248   Level: advanced
249 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead()
250 @*/
251 PetscErrorCode MatKAIJRestoreT(Mat A,PetscScalar **T)
252 {
253   PetscErrorCode ierr;
254 
255   PetscFunctionBegin;
256   if (T) *T = NULL;
257   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
258   PetscFunctionReturn(0);
259 }
260 
261 /*@C
262   MatKAIJRestoreTRead - Restore array obtained with MatKAIJGetTRead()
263 
264   Not collective
265 
266   Input Parameter:
267 . A - the KAIJ matrix
268 
269   Output Parameter:
270 . T - location of pointer to array obtained with MatKAIJGetS()
271 
272   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
273   If NULL is passed, it will not attempt to zero the array pointer.
274 
275   Level: advanced
276 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead()
277 @*/
278 PetscErrorCode MatKAIJRestoreTRead(Mat A,const PetscScalar **T)
279 {
280   PetscFunctionBegin;
281   if (T) *T = NULL;
282   PetscFunctionReturn(0);
283 }
284 
285 /*@
286    MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix
287 
288    Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel
289 
290    Input Parameters:
291 +  A - the KAIJ matrix
292 -  B - the AIJ matrix
293 
294    Notes:
295    This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed.
296    Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix.
297 
298    Level: advanced
299 
300 .seealso: MatKAIJGetAIJ(), MatKAIJSetS(), MatKAIJSetT()
301 @*/
302 PetscErrorCode MatKAIJSetAIJ(Mat A,Mat B)
303 {
304   PetscErrorCode ierr;
305   PetscMPIInt    size;
306 
307   PetscFunctionBegin;
308   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
309   if (size == 1) {
310     Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
311     a->AIJ = B;
312   } else {
313     Mat_MPIKAIJ *a = (Mat_MPIKAIJ*)A->data;
314     a->A = B;
315   }
316   ierr = PetscObjectReference((PetscObject)B);CHKERRQ(ierr);
317   PetscFunctionReturn(0);
318 }
319 
320 /*@C
321    MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix
322 
323    Logically Collective; the entire S is stored independently on all processes.
324 
325    Input Parameters:
326 +  A - the KAIJ matrix
327 .  p - the number of rows in S
328 .  q - the number of columns in S
329 -  S - the S matrix, in form of a scalar array in column-major format
330 
331    Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix.
332    The S matrix is copied, so the user can destroy this array.
333 
334    Level: Advanced
335 
336 .seealso: MatKAIJGetS(), MatKAIJSetT(), MatKAIJSetAIJ()
337 @*/
338 PetscErrorCode MatKAIJSetS(Mat A,PetscInt p,PetscInt q,const PetscScalar S[])
339 {
340   PetscErrorCode ierr;
341   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
342 
343   PetscFunctionBegin;
344   ierr = PetscFree(a->S);CHKERRQ(ierr);
345   if (S) {
346     ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->S);CHKERRQ(ierr);
347     ierr = PetscMemcpy(a->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
348   } else  a->S = NULL;
349 
350   a->p = p;
351   a->q = q;
352   PetscFunctionReturn(0);
353 }
354 
355 /*@C
356    MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix
357 
358    Logically Collective; the entire T is stored independently on all processes.
359 
360    Input Parameters:
361 +  A - the KAIJ matrix
362 .  p - the number of rows in S
363 .  q - the number of columns in S
364 -  T - the T matrix, in form of a scalar array in column-major format
365 
366    Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix.
367    The T matrix is copied, so the user can destroy this array.
368 
369    Level: Advanced
370 
371 .seealso: MatKAIJGetT(), MatKAIJSetS(), MatKAIJSetAIJ()
372 @*/
373 PetscErrorCode MatKAIJSetT(Mat A,PetscInt p,PetscInt q,const PetscScalar T[])
374 {
375   PetscErrorCode ierr;
376   PetscInt       i,j;
377   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
378   PetscBool      isTI = PETSC_FALSE;
379 
380   PetscFunctionBegin;
381   /* check if T is an identity matrix */
382   if (T && (p == q)) {
383     isTI = PETSC_TRUE;
384     for (i=0; i<p; i++) {
385       for (j=0; j<q; j++) {
386         if (i == j) {
387           /* diagonal term must be 1 */
388           if (T[i+j*p] != 1.0) isTI = PETSC_FALSE;
389         } else {
390           /* off-diagonal term must be 0 */
391           if (T[i+j*p] != 0.0) isTI = PETSC_FALSE;
392         }
393       }
394     }
395   }
396   a->isTI = isTI;
397 
398   ierr = PetscFree(a->T);CHKERRQ(ierr);
399   if (T && (!isTI)) {
400     ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->T);CHKERRQ(ierr);
401     ierr = PetscMemcpy(a->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
402   } else a->T = NULL;
403 
404   a->p = p;
405   a->q = q;
406   PetscFunctionReturn(0);
407 }
408 
409 PetscErrorCode MatDestroy_SeqKAIJ(Mat A)
410 {
411   PetscErrorCode ierr;
412   Mat_SeqKAIJ    *b = (Mat_SeqKAIJ*)A->data;
413 
414   PetscFunctionBegin;
415   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
416   ierr = PetscFree(b->S);CHKERRQ(ierr);
417   ierr = PetscFree(b->T);CHKERRQ(ierr);
418   ierr = PetscFree(b->ibdiag);CHKERRQ(ierr);
419   ierr = PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr);CHKERRQ(ierr);
420   ierr = PetscFree(A->data);CHKERRQ(ierr);
421   PetscFunctionReturn(0);
422 }
423 
424 PetscErrorCode MatSetUp_KAIJ(Mat A)
425 {
426   PetscErrorCode ierr;
427   PetscInt       n;
428   PetscMPIInt    size;
429   Mat_SeqKAIJ    *seqkaij = (Mat_SeqKAIJ*)A->data;
430 
431   PetscFunctionBegin;
432   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
433   if (size == 1) {
434     ierr = MatSetSizes(A,seqkaij->p*seqkaij->AIJ->rmap->n,seqkaij->q*seqkaij->AIJ->cmap->n,seqkaij->p*seqkaij->AIJ->rmap->N,seqkaij->q*seqkaij->AIJ->cmap->N);CHKERRQ(ierr);
435     ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr);
436     ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr);
437     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
438     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
439   } else {
440     Mat_MPIKAIJ *a;
441     Mat_MPIAIJ  *mpiaij;
442     IS          from,to;
443     Vec         gvec;
444     PetscScalar *T;
445     PetscInt    i,j;
446 
447     a = (Mat_MPIKAIJ*)A->data;
448     mpiaij = (Mat_MPIAIJ*)a->A->data;
449     ierr = MatSetSizes(A,a->p*a->A->rmap->n,a->q*a->A->cmap->n,a->p*a->A->rmap->N,a->q*a->A->cmap->N);CHKERRQ(ierr);
450     ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr);
451     ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr);
452     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
453     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
454 
455     if (a->isTI) {
456       /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL.
457        * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will
458        * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix
459        * to pass in. */
460       ierr = PetscMalloc1(a->p*a->q*sizeof(PetscScalar),&T);CHKERRQ(ierr);
461       for (i=0; i<a->p; i++) {
462         for (j=0; j<a->q; j++) {
463           if (i==j) T[i+j*a->p] = 1.0;
464           else      T[i+j*a->p] = 0.0;
465         }
466       }
467     } else T = a->T;
468     ierr = MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ);CHKERRQ(ierr);
469     ierr = MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ);CHKERRQ(ierr);
470     if (a->isTI) {
471       ierr = PetscFree(T);CHKERRQ(ierr);
472     }
473 
474     ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
475     ierr = VecCreate(PETSC_COMM_SELF,&a->w);CHKERRQ(ierr);
476     ierr = VecSetSizes(a->w,n*a->q,n*a->q);CHKERRQ(ierr);
477     ierr = VecSetBlockSize(a->w,a->q);CHKERRQ(ierr);
478     ierr = VecSetType(a->w,VECSEQ);CHKERRQ(ierr);
479 
480     /* create two temporary Index sets for build scatter gather */
481     ierr = ISCreateBlock(PetscObjectComm((PetscObject)a->A),a->q,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
482     ierr = ISCreateStride(PETSC_COMM_SELF,n*a->q,0,1,&to);CHKERRQ(ierr);
483 
484     /* create temporary global vector to generate scatter context */
485     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A),a->q,a->q*a->A->cmap->n,a->q*a->A->cmap->N,NULL,&gvec);CHKERRQ(ierr);
486 
487     /* generate the scatter context */
488     ierr = VecScatterCreate(gvec,from,a->w,to,&a->ctx);CHKERRQ(ierr);
489 
490     ierr = ISDestroy(&from);CHKERRQ(ierr);
491     ierr = ISDestroy(&to);CHKERRQ(ierr);
492     ierr = VecDestroy(&gvec);CHKERRQ(ierr);
493   }
494 
495   A->assembled = PETSC_TRUE;
496   PetscFunctionReturn(0);
497 }
498 
499 PetscErrorCode MatView_KAIJ(Mat A,PetscViewer viewer)
500 {
501   PetscViewerFormat format;
502   Mat_SeqKAIJ       *a = (Mat_SeqKAIJ*)A->data;
503   Mat               B;
504   PetscInt          i;
505   PetscErrorCode    ierr;
506   PetscBool         ismpikaij;
507 
508   PetscFunctionBegin;
509   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr);
510   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
511   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) {
512     ierr = PetscViewerASCIIPrintf(viewer,"S and T have %D rows and %D columns\n",a->p,a->q);CHKERRQ(ierr);
513 
514     /* Print appropriate details for S. */
515     if (!a->S) {
516       ierr = PetscViewerASCIIPrintf(viewer,"S is NULL\n");CHKERRQ(ierr);
517     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
518       ierr = PetscViewerASCIIPrintf(viewer,"Entries of S are ");CHKERRQ(ierr);
519       for (i=0; i<(a->p * a->q); i++) {
520 #if defined(PETSC_USE_COMPLEX)
521         ierr = PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->S[i]),(double)PetscImaginaryPart(a->S[i]));CHKERRQ(ierr);
522 #else
523         ierr = PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->S[i]));CHKERRQ(ierr);
524 #endif
525       }
526       ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
527     }
528 
529     /* Print appropriate details for T. */
530     if (a->isTI) {
531       ierr = PetscViewerASCIIPrintf(viewer,"T is the identity matrix\n");CHKERRQ(ierr);
532     } else if (!a->T) {
533       ierr = PetscViewerASCIIPrintf(viewer,"T is NULL\n");CHKERRQ(ierr);
534     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
535       ierr = PetscViewerASCIIPrintf(viewer,"Entries of T are ");CHKERRQ(ierr);
536       for (i=0; i<(a->p * a->q); i++) {
537 #if defined(PETSC_USE_COMPLEX)
538         ierr = PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->T[i]),(double)PetscImaginaryPart(a->T[i]));CHKERRQ(ierr);
539 #else
540         ierr = PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->T[i]));CHKERRQ(ierr);
541 #endif
542       }
543       ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
544     }
545 
546     /* Now print details for the AIJ matrix, using the AIJ viewer. */
547     ierr = PetscViewerASCIIPrintf(viewer,"Now viewing the associated AIJ matrix:\n");CHKERRQ(ierr);
548     if (ismpikaij) {
549       Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
550       ierr = MatView(b->A,viewer);CHKERRQ(ierr);
551     } else {
552       ierr = MatView(a->AIJ,viewer);CHKERRQ(ierr);
553     }
554 
555   } else {
556     /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */
557     if (ismpikaij) {
558       ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
559     } else {
560       ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
561     }
562     ierr = MatView(B,viewer);CHKERRQ(ierr);
563     ierr = MatDestroy(&B);CHKERRQ(ierr);
564   }
565   PetscFunctionReturn(0);
566 }
567 
568 PetscErrorCode MatDestroy_MPIKAIJ(Mat A)
569 {
570   PetscErrorCode ierr;
571   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
572 
573   PetscFunctionBegin;
574   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
575   ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr);
576   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
577   ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr);
578   ierr = VecDestroy(&b->w);CHKERRQ(ierr);
579   ierr = PetscFree(b->S);CHKERRQ(ierr);
580   ierr = PetscFree(b->T);CHKERRQ(ierr);
581   ierr = PetscFree(b->ibdiag);CHKERRQ(ierr);
582   ierr = PetscFree(A->data);CHKERRQ(ierr);
583   PetscFunctionReturn(0);
584 }
585 
586 /* --------------------------------------------------------------------------------------*/
587 
588 /* zz = yy + Axx */
589 PetscErrorCode MatMultAdd_SeqKAIJ(Mat A,Vec xx,Vec yy,Vec zz)
590 {
591   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ*)A->data;
592   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
593   const PetscScalar *s = b->S, *t = b->T;
594   const PetscScalar *x,*v,*bx;
595   PetscScalar       *y,*sums;
596   PetscErrorCode    ierr;
597   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
598   PetscInt          n,i,jrow,j,l,p=b->p,q=b->q,k;
599 
600   PetscFunctionBegin;
601   if (!yy) {
602     ierr = VecSet(zz,0.0);CHKERRQ(ierr);
603   } else {
604     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
605   }
606   if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0);
607 
608   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
609   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
610   idx  = a->j;
611   v    = a->a;
612   ii   = a->i;
613 
614   if (b->isTI) {
615     for (i=0; i<m; i++) {
616       jrow = ii[i];
617       n    = ii[i+1] - jrow;
618       sums = y + p*i;
619       for (j=0; j<n; j++) {
620         for (k=0; k<p; k++) {
621           sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k];
622         }
623       }
624     }
625     ierr = PetscLogFlops((a->nz)*3*p);CHKERRQ(ierr);
626   } else if (t) {
627     for (i=0; i<m; i++) {
628       jrow = ii[i];
629       n    = ii[i+1] - jrow;
630       sums = y + p*i;
631       bx   = x + q*i;
632       for (j=0; j<n; j++) {
633         for (k=0; k<p; k++) {
634           for (l=0; l<q; l++) {
635             sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l];
636           }
637         }
638       }
639     }
640     /* The flop count below assumes that v[jrow+j] is hoisted out (which an optimizing compiler is likely to do),
641      * and also that T part is hoisted outside this loop (in exchange for temporary storage) as (A \otimes I) (I \otimes T),
642      * so that this multiply doesn't have to be redone for each matrix entry, but just once per column. The latter
643      * transformation is much less likely to be applied, but we nonetheless count the minimum flops required. */
644     ierr = PetscLogFlops((2.0*p*q-p)*m+2*p*a->nz);CHKERRQ(ierr);
645   }
646   if (s) {
647     for (i=0; i<m; i++) {
648       sums = y + p*i;
649       bx   = x + q*i;
650       if (i < b->AIJ->cmap->n) {
651         for (j=0; j<q; j++) {
652           for (k=0; k<p; k++) {
653             sums[k] += s[k+j*p]*bx[j];
654           }
655         }
656       }
657     }
658     ierr = PetscLogFlops(m*2*p*q);CHKERRQ(ierr);
659   }
660 
661   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
662   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
663   PetscFunctionReturn(0);
664 }
665 
666 PetscErrorCode MatMult_SeqKAIJ(Mat A,Vec xx,Vec yy)
667 {
668   PetscErrorCode ierr;
669   PetscFunctionBegin;
670   ierr = MatMultAdd_SeqKAIJ(A,xx,PETSC_NULL,yy);CHKERRQ(ierr);
671   PetscFunctionReturn(0);
672 }
673 
674 #include <petsc/private/kernels/blockinvert.h>
675 
676 PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A,const PetscScalar **values)
677 {
678   Mat_SeqKAIJ       *b  = (Mat_SeqKAIJ*)A->data;
679   Mat_SeqAIJ        *a  = (Mat_SeqAIJ*)b->AIJ->data;
680   const PetscScalar *S  = b->S;
681   const PetscScalar *T  = b->T;
682   const PetscScalar *v  = a->a;
683   const PetscInt     p  = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i;
684   PetscErrorCode    ierr;
685   PetscInt          i,j,*v_pivots,dof,dof2;
686   PetscScalar       *diag,aval,*v_work;
687 
688   PetscFunctionBegin;
689   if (p != q) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse.");
690   if ((!S) && (!T) && (!b->isTI)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix.");
691 
692   dof  = p;
693   dof2 = dof*dof;
694 
695   if (b->ibdiagvalid) {
696     if (values) *values = b->ibdiag;
697     PetscFunctionReturn(0);
698   }
699   if (!b->ibdiag) {
700     ierr = PetscMalloc1(dof2*m*sizeof(PetscScalar),&b->ibdiag);CHKERRQ(ierr);
701     ierr = PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar));CHKERRQ(ierr);
702   }
703   if (values) *values = b->ibdiag;
704   diag = b->ibdiag;
705 
706   ierr = PetscMalloc2(dof,&v_work,dof,&v_pivots);CHKERRQ(ierr);
707   for (i=0; i<m; i++) {
708     if (S) {
709       ierr = PetscMemcpy(diag,S,dof2*sizeof(PetscScalar));CHKERRQ(ierr);
710     } else {
711       ierr = PetscMemzero(diag,dof2*sizeof(PetscScalar));CHKERRQ(ierr);
712     }
713     if (b->isTI) {
714       aval = 0;
715       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
716       for (j=0; j<dof; j++) diag[j+dof*j] += aval;
717     } else if (T) {
718       aval = 0;
719       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
720       for (j=0; j<dof2; j++) diag[j] += aval*T[j];
721     }
722     ierr = PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL);CHKERRQ(ierr);
723     diag += dof2;
724   }
725   ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
726 
727   b->ibdiagvalid = PETSC_TRUE;
728   PetscFunctionReturn(0);
729 }
730 
731 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B)
732 {
733   Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data;
734 
735   PetscFunctionBegin;
736   *B = kaij->AIJ;
737   PetscFunctionReturn(0);
738 }
739 
740 PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
741 {
742   PetscErrorCode    ierr;
743   Mat_SeqKAIJ       *kaij = (Mat_SeqKAIJ*) A->data;
744   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)kaij->AIJ->data;
745   const PetscScalar *aa = a->a, *T = kaij->T, *v;
746   const PetscInt    m  = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi;
747   const PetscScalar *b, *xb, *idiag;
748   PetscScalar       *x, *work, *workt, *w, *y, *arr, *t, *arrt;
749   PetscInt          i, j, k, i2, bs, bs2, nz;
750 
751   PetscFunctionBegin;
752   its = its*lits;
753   if (flag & SOR_EISENSTAT) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
754   if (its <= 0)             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
755   if (fshift)               SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift");
756   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts");
757   if (p != q) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: No support for non-square dense blocks");
758   else        {bs = p; bs2 = bs*bs; }
759 
760   if (!m) PetscFunctionReturn(0);
761 
762   if (!kaij->ibdiagvalid) { ierr = MatInvertBlockDiagonal_SeqKAIJ(A,NULL);CHKERRQ(ierr); }
763   idiag = kaij->ibdiag;
764   diag  = a->diag;
765 
766   if (!kaij->sor.setup) {
767     ierr = PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr);CHKERRQ(ierr);
768     kaij->sor.setup = PETSC_TRUE;
769   }
770   y     = kaij->sor.y;
771   w     = kaij->sor.w;
772   work  = kaij->sor.work;
773   t     = kaij->sor.t;
774   arr   = kaij->sor.arr;
775 
776   ierr = VecGetArray(xx,&x);    CHKERRQ(ierr);
777   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
778 
779   if (flag & SOR_ZERO_INITIAL_GUESS) {
780     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
781       PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);                            /* x[0:bs] <- D^{-1} b[0:bs] */
782       ierr   =  PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
783       i2     =  bs;
784       idiag  += bs2;
785       for (i=1; i<m; i++) {
786         v  = aa + ai[i];
787         vi = aj + ai[i];
788         nz = diag[i] - ai[i];
789 
790         if (T) {                /* b - T (Arow * x) */
791           ierr = PetscMemzero(w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
792           for (j=0; j<nz; j++) {
793             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
794           }
795           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]);
796           for (k=0; k<bs; k++) t[i2+k] += b[i2+k];
797         } else if (kaij->isTI) {
798           ierr = PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
799           for (j=0; j<nz; j++) {
800             for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k];
801           }
802         } else {
803           ierr = PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
804         }
805 
806         PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y);
807         for (j=0; j<bs; j++) x[i2+j] = omega * y[j];
808 
809         idiag += bs2;
810         i2    += bs;
811       }
812       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
813       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
814       xb = t;
815     } else xb = b;
816     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
817       idiag = kaij->ibdiag+bs2*(m-1);
818       i2    = bs * (m-1);
819       ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
820       PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
821       i2    -= bs;
822       idiag -= bs2;
823       for (i=m-2; i>=0; i--) {
824         v  = aa + diag[i] + 1 ;
825         vi = aj + diag[i] + 1;
826         nz = ai[i+1] - diag[i] - 1;
827 
828         if (T) {                /* FIXME: This branch untested */
829           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
830           /* copy all rows of x that are needed into contiguous space */
831           workt = work;
832           for (j=0; j<nz; j++) {
833             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
834             workt += bs;
835           }
836           arrt = arr;
837           for (j=0; j<nz; j++) {
838             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
839             for (k=0; k<bs2; k++) arrt[k] *= v[j];
840             arrt += bs2;
841           }
842           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
843         } else if (kaij->isTI) {
844           ierr = PetscMemcpy(w,t+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
845           for (j=0; j<nz; j++) {
846             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
847           }
848         }
849 
850         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */
851         for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j];
852 
853         idiag -= bs2;
854         i2    -= bs;
855       }
856       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
857     }
858     its--;
859   }
860   while (its--) {               /* FIXME: This branch not updated */
861     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
862       i2     =  0;
863       idiag  = kaij->ibdiag;
864       for (i=0; i<m; i++) {
865         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
866 
867         v  = aa + ai[i];
868         vi = aj + ai[i];
869         nz = diag[i] - ai[i];
870         workt = work;
871         for (j=0; j<nz; j++) {
872           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
873           workt += bs;
874         }
875         arrt = arr;
876         if (T) {
877           for (j=0; j<nz; j++) {
878             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
879             for (k=0; k<bs2; k++) arrt[k] *= v[j];
880             arrt += bs2;
881           }
882           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
883         } else if (kaij->isTI) {
884           for (j=0; j<nz; j++) {
885             ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
886             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
887             arrt += bs2;
888           }
889           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
890         }
891         ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
892 
893         v  = aa + diag[i] + 1;
894         vi = aj + diag[i] + 1;
895         nz = ai[i+1] - diag[i] - 1;
896         workt = work;
897         for (j=0; j<nz; j++) {
898           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
899           workt += bs;
900         }
901         arrt = arr;
902         if (T) {
903           for (j=0; j<nz; j++) {
904             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
905             for (k=0; k<bs2; k++) arrt[k] *= v[j];
906             arrt += bs2;
907           }
908           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
909         } else if (kaij->isTI) {
910           for (j=0; j<nz; j++) {
911             ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
912             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
913             arrt += bs2;
914           }
915           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
916         }
917 
918         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
919         for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
920 
921         idiag += bs2;
922         i2    += bs;
923       }
924       xb = t;
925     }
926     else xb = b;
927     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
928       idiag = kaij->ibdiag+bs2*(m-1);
929       i2    = bs * (m-1);
930       if (xb == b) {
931         for (i=m-1; i>=0; i--) {
932           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
933 
934           v  = aa + ai[i];
935           vi = aj + ai[i];
936           nz = diag[i] - ai[i];
937           workt = work;
938           for (j=0; j<nz; j++) {
939             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
940             workt += bs;
941           }
942           arrt = arr;
943           if (T) {
944             for (j=0; j<nz; j++) {
945               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
946               for (k=0; k<bs2; k++) arrt[k] *= v[j];
947               arrt += bs2;
948             }
949             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
950           } else if (kaij->isTI) {
951             for (j=0; j<nz; j++) {
952               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
953               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
954               arrt += bs2;
955             }
956             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
957           }
958 
959           v  = aa + diag[i] + 1;
960           vi = aj + diag[i] + 1;
961           nz = ai[i+1] - diag[i] - 1;
962           workt = work;
963           for (j=0; j<nz; j++) {
964             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
965             workt += bs;
966           }
967           arrt = arr;
968           if (T) {
969             for (j=0; j<nz; j++) {
970               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
971               for (k=0; k<bs2; k++) arrt[k] *= v[j];
972               arrt += bs2;
973             }
974             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
975           } else if (kaij->isTI) {
976             for (j=0; j<nz; j++) {
977               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
978               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
979               arrt += bs2;
980             }
981             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
982           }
983 
984           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
985           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
986         }
987       } else {
988         for (i=m-1; i>=0; i--) {
989           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
990           v  = aa + diag[i] + 1;
991           vi = aj + diag[i] + 1;
992           nz = ai[i+1] - diag[i] - 1;
993           workt = work;
994           for (j=0; j<nz; j++) {
995             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
996             workt += bs;
997           }
998           arrt = arr;
999           if (T) {
1000             for (j=0; j<nz; j++) {
1001               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
1002               for (k=0; k<bs2; k++) arrt[k] *= v[j];
1003               arrt += bs2;
1004             }
1005             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1006           } else if (kaij->isTI) {
1007             for (j=0; j<nz; j++) {
1008               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
1009               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
1010               arrt += bs2;
1011             }
1012             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1013           }
1014           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
1015           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
1016         }
1017         idiag -= bs2;
1018         i2    -= bs;
1019       }
1020       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1021     }
1022   }
1023 
1024   ierr = VecRestoreArray(xx,&x);    CHKERRQ(ierr);
1025   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1026   PetscFunctionReturn(0);
1027 }
1028 
1029 /*===================================================================================*/
1030 
1031 PetscErrorCode MatMultAdd_MPIKAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1032 {
1033   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
1034   PetscErrorCode ierr;
1035 
1036   PetscFunctionBegin;
1037   if (!yy) {
1038     ierr = VecSet(zz,0.0);CHKERRQ(ierr);
1039   } else {
1040     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
1041   }
1042   /* start the scatter */
1043   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1044   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz);CHKERRQ(ierr);
1045   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1046   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
1047   PetscFunctionReturn(0);
1048 }
1049 
1050 PetscErrorCode MatMult_MPIKAIJ(Mat A,Vec xx,Vec yy)
1051 {
1052   PetscErrorCode ierr;
1053   PetscFunctionBegin;
1054   ierr = MatMultAdd_MPIKAIJ(A,xx,PETSC_NULL,yy);CHKERRQ(ierr);
1055   PetscFunctionReturn(0);
1056 }
1057 
1058 PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A,const PetscScalar **values)
1059 {
1060   Mat_MPIKAIJ     *b = (Mat_MPIKAIJ*)A->data;
1061   PetscErrorCode  ierr;
1062 
1063   PetscFunctionBegin;
1064   ierr = (*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values);CHKERRQ(ierr);
1065   PetscFunctionReturn(0);
1066 }
1067 
1068 /* ----------------------------------------------------------------*/
1069 
1070 PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
1071 {
1072   Mat_SeqKAIJ     *b   = (Mat_SeqKAIJ*) A->data;
1073   PetscErrorCode  diag = PETSC_FALSE;
1074   PetscErrorCode  ierr;
1075   PetscInt        nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c;
1076   PetscScalar     *vaij,*v,*S=b->S,*T=b->T;
1077 
1078   PetscFunctionBegin;
1079   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1080   b->getrowactive = PETSC_TRUE;
1081   if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row);
1082 
1083   if ((!S) && (!T) && (!b->isTI)) {
1084     if (ncols)    *ncols  = 0;
1085     if (cols)     *cols   = NULL;
1086     if (values)   *values = NULL;
1087     PetscFunctionReturn(0);
1088   }
1089 
1090   if (T || b->isTI) {
1091     ierr  = MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij);CHKERRQ(ierr);
1092     c     = nzaij;
1093     for (i=0; i<nzaij; i++) {
1094       /* check if this row contains a diagonal entry */
1095       if (colsaij[i] == r) {
1096         diag = PETSC_TRUE;
1097         c = i;
1098       }
1099     }
1100   } else nzaij = c = 0;
1101 
1102   /* calculate size of row */
1103   nz = 0;
1104   if (S)            nz += q;
1105   if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q);
1106 
1107   if (cols || values) {
1108     ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr);
1109     for (i=0; i<q; i++) {
1110       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1111       v[i] = 0.0;
1112     }
1113     if (b->isTI) {
1114       for (i=0; i<nzaij; i++) {
1115         for (j=0; j<q; j++) {
1116           idx[i*q+j] = colsaij[i]*q+j;
1117           v[i*q+j]   = (j==s ? vaij[i] : 0);
1118         }
1119       }
1120     } else if (T) {
1121       for (i=0; i<nzaij; i++) {
1122         for (j=0; j<q; j++) {
1123           idx[i*q+j] = colsaij[i]*q+j;
1124           v[i*q+j]   = vaij[i]*T[s+j*p];
1125         }
1126       }
1127     }
1128     if (S) {
1129       for (j=0; j<q; j++) {
1130         idx[c*q+j] = r*q+j;
1131         v[c*q+j]  += S[s+j*p];
1132       }
1133     }
1134   }
1135 
1136   if (ncols)    *ncols  = nz;
1137   if (cols)     *cols   = idx;
1138   if (values)   *values = v;
1139   PetscFunctionReturn(0);
1140 }
1141 
1142 PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1143 {
1144   PetscErrorCode ierr;
1145   PetscFunctionBegin;
1146   ierr = PetscFree2(*idx,*v);CHKERRQ(ierr);
1147   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
1148   PetscFunctionReturn(0);
1149 }
1150 
1151 PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
1152 {
1153   Mat_MPIKAIJ     *b      = (Mat_MPIKAIJ*) A->data;
1154   Mat             MatAIJ  = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ;
1155   Mat             MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ;
1156   Mat             AIJ     = b->A;
1157   PetscBool       diag    = PETSC_FALSE;
1158   PetscErrorCode  ierr;
1159   const PetscInt  rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray;
1160   PetscInt        nz,*idx,ncolsaij,ncolsoaij,*colsaij,*colsoaij,r,s,c,i,j,lrow;
1161   PetscScalar     *v,*vals,*ovals,*S=b->S,*T=b->T;
1162 
1163   PetscFunctionBegin;
1164   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1165   b->getrowactive = PETSC_TRUE;
1166   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1167   lrow = row - rstart;
1168 
1169   if ((!S) && (!T) && (!b->isTI)) {
1170     if (ncols)    *ncols  = 0;
1171     if (cols)     *cols   = NULL;
1172     if (values)   *values = NULL;
1173     PetscFunctionReturn(0);
1174   }
1175 
1176   r = lrow/p;
1177   s = lrow%p;
1178 
1179   if (T || b->isTI) {
1180     ierr = MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray);CHKERRQ(ierr);
1181     ierr = MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals);CHKERRQ(ierr);
1182     ierr = MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals);CHKERRQ(ierr);
1183 
1184     c     = ncolsaij + ncolsoaij;
1185     for (i=0; i<ncolsaij; i++) {
1186       /* check if this row contains a diagonal entry */
1187       if (colsaij[i] == r) {
1188         diag = PETSC_TRUE;
1189         c = i;
1190       }
1191     }
1192   } else c = 0;
1193 
1194   /* calculate size of row */
1195   nz = 0;
1196   if (S)            nz += q;
1197   if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q);
1198 
1199   if (cols || values) {
1200     ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr);
1201     for (i=0; i<q; i++) {
1202       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1203       v[i] = 0.0;
1204     }
1205     if (b->isTI) {
1206       for (i=0; i<ncolsaij; i++) {
1207         for (j=0; j<q; j++) {
1208           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1209           v[i*q+j]   = (j==s ? vals[i] : 0.0);
1210         }
1211       }
1212       for (i=0; i<ncolsoaij; i++) {
1213         for (j=0; j<q; j++) {
1214           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1215           v[(i+ncolsaij)*q+j]   = (j==s ? ovals[i]: 0.0);
1216         }
1217       }
1218     } else if (T) {
1219       for (i=0; i<ncolsaij; i++) {
1220         for (j=0; j<q; j++) {
1221           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1222           v[i*q+j]   = vals[i]*T[s+j*p];
1223         }
1224       }
1225       for (i=0; i<ncolsoaij; i++) {
1226         for (j=0; j<q; j++) {
1227           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1228           v[(i+ncolsaij)*q+j]   = ovals[i]*T[s+j*p];
1229         }
1230       }
1231     }
1232     if (S) {
1233       for (j=0; j<q; j++) {
1234         idx[c*q+j] = (r+rstart/p)*q+j;
1235         v[c*q+j]  += S[s+j*p];
1236       }
1237     }
1238   }
1239 
1240   if (ncols)  *ncols  = nz;
1241   if (cols)   *cols   = idx;
1242   if (values) *values = v;
1243   PetscFunctionReturn(0);
1244 }
1245 
1246 PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1247 {
1248   PetscErrorCode ierr;
1249   PetscFunctionBegin;
1250   ierr = PetscFree2(*idx,*v);CHKERRQ(ierr);
1251   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
1252   PetscFunctionReturn(0);
1253 }
1254 
1255 PetscErrorCode  MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
1256 {
1257   PetscErrorCode ierr;
1258   Mat            A;
1259 
1260   PetscFunctionBegin;
1261   ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1262   ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr);
1263   ierr = MatDestroy(&A);CHKERRQ(ierr);
1264   PetscFunctionReturn(0);
1265 }
1266 
1267 /* ---------------------------------------------------------------------------------- */
1268 /*@C
1269   MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form:
1270 
1271     [I \otimes S + A \otimes T]
1272 
1273   where
1274     S is a dense (p \times q) matrix
1275     T is a dense (p \times q) matrix
1276     A is an AIJ  (n \times n) matrix
1277     I is the identity matrix
1278   The resulting matrix is (np \times nq)
1279 
1280   S and T are always stored independently on all processes as PetscScalar arrays in column-major format.
1281 
1282   Collective
1283 
1284   Input Parameters:
1285 + A - the AIJ matrix
1286 . p - number of rows in S and T
1287 . q - number of columns in S and T
1288 . S - the S matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major)
1289 - T - the T matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major)
1290 
1291   Output Parameter:
1292 . kaij - the new KAIJ matrix
1293 
1294   Notes:
1295   This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed.
1296   Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix.
1297 
1298   Level: advanced
1299 
1300 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MATKAIJ
1301 @*/
1302 PetscErrorCode  MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij)
1303 {
1304   PetscErrorCode ierr;
1305   PetscMPIInt    size;
1306 
1307   PetscFunctionBegin;
1308   ierr = MatCreate(PetscObjectComm((PetscObject)A),kaij);CHKERRQ(ierr);
1309   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
1310   if (size == 1) {
1311     ierr = MatSetType(*kaij,MATSEQKAIJ);CHKERRQ(ierr);
1312   } else {
1313     ierr = MatSetType(*kaij,MATMPIKAIJ);CHKERRQ(ierr);
1314   }
1315   ierr = MatKAIJSetAIJ(*kaij,A);CHKERRQ(ierr);
1316   ierr = MatKAIJSetS(*kaij,p,q,S);CHKERRQ(ierr);
1317   ierr = MatKAIJSetT(*kaij,p,q,T);CHKERRQ(ierr);
1318   ierr = MatSetUp(*kaij);CHKERRQ(ierr);
1319   PetscFunctionReturn(0);
1320 }
1321 
1322 /*MC
1323   MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of the following form:
1324 
1325     [I \otimes S + A \otimes T]
1326 
1327   where
1328     S is a dense (p \times q) matrix
1329     T is a dense (p \times q) matrix
1330     A is an AIJ  (n \times n) matrix
1331     I is the identity matrix
1332   The resulting matrix is (np \times nq)
1333 
1334   S and T are always stored independently on all processes as PetscScalar arrays in column-major format.
1335 
1336   Level: advanced
1337 
1338 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ()
1339 M*/
1340 
1341 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A)
1342 {
1343   PetscErrorCode ierr;
1344   Mat_MPIKAIJ    *b;
1345   PetscMPIInt    size;
1346 
1347   PetscFunctionBegin;
1348   ierr     = PetscNewLog(A,&b);CHKERRQ(ierr);
1349   A->data  = (void*)b;
1350 
1351   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1352 
1353   A->ops->setup = MatSetUp_KAIJ;
1354 
1355   b->w    = 0;
1356   ierr    = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
1357   if (size == 1) {
1358     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ);CHKERRQ(ierr);
1359     A->ops->setup               = MatSetUp_KAIJ;
1360     A->ops->destroy             = MatDestroy_SeqKAIJ;
1361     A->ops->view                = MatView_KAIJ;
1362     A->ops->mult                = MatMult_SeqKAIJ;
1363     A->ops->multadd             = MatMultAdd_SeqKAIJ;
1364     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ;
1365     A->ops->getrow              = MatGetRow_SeqKAIJ;
1366     A->ops->restorerow          = MatRestoreRow_SeqKAIJ;
1367     A->ops->sor                 = MatSOR_SeqKAIJ;
1368   } else {
1369     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ);CHKERRQ(ierr);
1370     A->ops->setup               = MatSetUp_KAIJ;
1371     A->ops->destroy             = MatDestroy_MPIKAIJ;
1372     A->ops->view                = MatView_KAIJ;
1373     A->ops->mult                = MatMult_MPIKAIJ;
1374     A->ops->multadd             = MatMultAdd_MPIKAIJ;
1375     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ;
1376     A->ops->getrow              = MatGetRow_MPIKAIJ;
1377     A->ops->restorerow          = MatRestoreRow_MPIKAIJ;
1378     ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ);CHKERRQ(ierr);
1379   }
1380   A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ;
1381   PetscFunctionReturn(0);
1382 }
1383