xref: /petsc/src/mat/impls/kaij/kaij.c (revision e0e5a7938183be6b93764100cbef6621004a033e)
1 
2 /*
3   Defines the basic matrix operations for the KAIJ  matrix storage format.
4   This format is used to evaluate matrices of the form:
5 
6     [I \otimes S + A \otimes T]
7 
8   where
9     S is a dense (p \times q) matrix
10     T is a dense (p \times q) matrix
11     A is an AIJ  (n \times n) matrix
12     I is the identity matrix
13 
14   The resulting matrix is (np \times nq)
15 
16   We provide:
17      MatMult()
18      MatMultAdd()
19      MatInvertBlockDiagonal()
20   and
21      MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*)
22 
23   This single directory handles both the sequential and parallel codes
24 */
25 
26 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/
27 #include <../src/mat/utils/freespace.h>
28 #include <petsc/private/vecimpl.h>
29 
30 /*@C
31    MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix
32 
33    Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel
34 
35    Input Parameter:
36 .  A - the KAIJ matrix
37 
38    Output Parameter:
39 .  B - the AIJ matrix
40 
41    Level: advanced
42 
43    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
44 
45 .seealso: MatCreateKAIJ()
46 @*/
47 PetscErrorCode  MatKAIJGetAIJ(Mat A,Mat *B)
48 {
49   PetscErrorCode ierr;
50   PetscBool      ismpikaij,isseqkaij;
51 
52   PetscFunctionBegin;
53   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr);
54   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij);CHKERRQ(ierr);
55   if (ismpikaij) {
56     Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
57 
58     *B = b->A;
59   } else if (isseqkaij) {
60     Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
61 
62     *B = b->AIJ;
63   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix passed in is not of type KAIJ");
64   PetscFunctionReturn(0);
65 }
66 
67 /*@C
68    MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix
69 
70    Not Collective; the entire S is stored and returned independently on all processes.
71 
72    Input Parameter:
73 .  A - the KAIJ matrix
74 
75    Output Parameters:
76 +  m - the number of rows in S
77 .  n - the number of columns in S
78 -  S - the S matrix, in form of a scalar array in column-major format
79 
80    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
81 
82    Level: advanced
83 
84 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
85 @*/
86 PetscErrorCode MatKAIJGetS(Mat A,PetscInt *m,PetscInt *n,PetscScalar **S)
87 {
88   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
89   PetscFunctionBegin;
90   if (m) *m = b->p;
91   if (n) *n = b->q;
92   if (S) *S = b->S;
93   PetscFunctionReturn(0);
94 }
95 
96 /*@C
97    MatKAIJGetSRead - Get a read-only pointer to the S matrix describing the shift action of the KAIJ matrix
98 
99    Not Collective; the entire S is stored and returned independently on all processes.
100 
101    Input Parameter:
102 .  A - the KAIJ matrix
103 
104    Output Parameters:
105 +  m - the number of rows in S
106 .  n - the number of columns in S
107 -  S - the S matrix, in form of a scalar array in column-major format
108 
109    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
110 
111    Level: advanced
112 
113 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
114 @*/
115 PetscErrorCode MatKAIJGetSRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **S)
116 {
117   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
118   PetscFunctionBegin;
119   if (m) *m = b->p;
120   if (n) *n = b->q;
121   if (S) *S = b->S;
122   PetscFunctionReturn(0);
123 }
124 
125 /*@C
126   MatKAIJRestoreS - Restore array obtained with MatKAIJGetS()
127 
128   Not collective
129 
130   Input Parameter:
131 . A - the KAIJ matrix
132 
133   Output Parameter:
134 . S - location of pointer to array obtained with MatKAIJGetS()
135 
136   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
137   If NULL is passed, it will not attempt to zero the array pointer.
138 
139   Level: advanced
140 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead()
141 @*/
142 PetscErrorCode MatKAIJRestoreS(Mat A,PetscScalar **S)
143 {
144   PetscErrorCode ierr;
145 
146   PetscFunctionBegin;
147   if (S) *S = NULL;
148   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
149   PetscFunctionReturn(0);
150 }
151 
152 /*@C
153   MatKAIJRestoreSRead - Restore array obtained with MatKAIJGetSRead()
154 
155   Not collective
156 
157   Input Parameter:
158 . A - the KAIJ matrix
159 
160   Output Parameter:
161 . S - location of pointer to array obtained with MatKAIJGetS()
162 
163   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
164   If NULL is passed, it will not attempt to zero the array pointer.
165 
166   Level: advanced
167 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead()
168 @*/
169 PetscErrorCode MatKAIJRestoreSRead(Mat A,const PetscScalar **S)
170 {
171   PetscFunctionBegin;
172   if (S) *S = NULL;
173   PetscFunctionReturn(0);
174 }
175 
176 /*@C
177    MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix
178 
179    Not Collective; the entire T is stored and returned independently on all processes
180 
181    Input Parameter:
182 .  A - the KAIJ matrix
183 
184    Output Parameters:
185 +  m - the number of rows in T
186 .  n - the number of columns in T
187 -  T - the T matrix, in form of a scalar array in column-major format
188 
189    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
190 
191    Level: advanced
192 
193 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
194 @*/
195 PetscErrorCode MatKAIJGetT(Mat A,PetscInt *m,PetscInt *n,PetscScalar **T)
196 {
197   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
198   PetscFunctionBegin;
199   if (m) *m = b->p;
200   if (n) *n = b->q;
201   if (T) *T = b->T;
202   PetscFunctionReturn(0);
203 }
204 
205 /*@C
206    MatKAIJGetTRead - Get a read-only pointer to the transformation matrix T associated with the KAIJ matrix
207 
208    Not Collective; the entire T is stored and returned independently on all processes
209 
210    Input Parameter:
211 .  A - the KAIJ matrix
212 
213    Output Parameters:
214 +  m - the number of rows in T
215 .  n - the number of columns in T
216 -  T - the T matrix, in form of a scalar array in column-major format
217 
218    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
219 
220    Level: advanced
221 
222 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
223 @*/
224 PetscErrorCode MatKAIJGetTRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **T)
225 {
226   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
227   PetscFunctionBegin;
228   if (m) *m = b->p;
229   if (n) *n = b->q;
230   if (T) *T = b->T;
231   PetscFunctionReturn(0);
232 }
233 
234 /*@C
235   MatKAIJRestoreT - Restore array obtained with MatKAIJGetT()
236 
237   Not collective
238 
239   Input Parameter:
240 . A - the KAIJ matrix
241 
242   Output Parameter:
243 . T - location of pointer to array obtained with MatKAIJGetS()
244 
245   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
246   If NULL is passed, it will not attempt to zero the array pointer.
247 
248   Level: advanced
249 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead()
250 @*/
251 PetscErrorCode MatKAIJRestoreT(Mat A,PetscScalar **T)
252 {
253   PetscErrorCode ierr;
254 
255   PetscFunctionBegin;
256   if (T) *T = NULL;
257   ierr = PetscObjectStateIncrease((PetscObject)A);CHKERRQ(ierr);
258   PetscFunctionReturn(0);
259 }
260 
261 /*@C
262   MatKAIJRestoreTRead - Restore array obtained with MatKAIJGetTRead()
263 
264   Not collective
265 
266   Input Parameter:
267 . A - the KAIJ matrix
268 
269   Output Parameter:
270 . T - location of pointer to array obtained with MatKAIJGetS()
271 
272   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
273   If NULL is passed, it will not attempt to zero the array pointer.
274 
275   Level: advanced
276 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead()
277 @*/
278 PetscErrorCode MatKAIJRestoreTRead(Mat A,const PetscScalar **T)
279 {
280   PetscFunctionBegin;
281   if (T) *T = NULL;
282   PetscFunctionReturn(0);
283 }
284 
285 /*@
286    MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix
287 
288    Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel
289 
290    Input Parameters:
291 +  A - the KAIJ matrix
292 -  B - the AIJ matrix
293 
294    Notes:
295    This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed.
296    Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix.
297 
298    Level: advanced
299 
300 .seealso: MatKAIJGetAIJ(), MatKAIJSetS(), MatKAIJSetT()
301 @*/
302 PetscErrorCode MatKAIJSetAIJ(Mat A,Mat B)
303 {
304   PetscErrorCode ierr;
305   PetscMPIInt    size;
306 
307   PetscFunctionBegin;
308   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
309   if (size == 1) {
310     Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
311     a->AIJ = B;
312   } else {
313     Mat_MPIKAIJ *a = (Mat_MPIKAIJ*)A->data;
314     a->A = B;
315   }
316   ierr = PetscObjectReference((PetscObject)B);CHKERRQ(ierr);
317   PetscFunctionReturn(0);
318 }
319 
320 /*@C
321    MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix
322 
323    Logically Collective; the entire S is stored independently on all processes.
324 
325    Input Parameters:
326 +  A - the KAIJ matrix
327 .  p - the number of rows in S
328 .  q - the number of columns in S
329 -  S - the S matrix, in form of a scalar array in column-major format
330 
331    Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix.
332    The S matrix is copied, so the user can destroy this array.
333 
334    Level: Advanced
335 
336 .seealso: MatKAIJGetS(), MatKAIJSetT(), MatKAIJSetAIJ()
337 @*/
338 PetscErrorCode MatKAIJSetS(Mat A,PetscInt p,PetscInt q,const PetscScalar S[])
339 {
340   PetscErrorCode ierr;
341   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
342 
343   PetscFunctionBegin;
344   ierr = PetscFree(a->S);CHKERRQ(ierr);
345   if (S) {
346     ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->S);CHKERRQ(ierr);
347     ierr = PetscMemcpy(a->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
348   } else  a->S = NULL;
349 
350   a->p = p;
351   a->q = q;
352   PetscFunctionReturn(0);
353 }
354 
355 /*@C
356    MatKAIJGetScaledIdentity - Check if both S and T are scaled identities.
357 
358    Logically Collective.
359 
360    Input Parameter:
361 .  A - the KAIJ matrix
362 
363   Output Parameter:
364 .  identity - the Boolean value
365 
366    Level: Advanced
367 
368 .seealso: MatKAIJGetS(), MatKAIJGetT()
369 @*/
370 PetscErrorCode MatKAIJGetScaledIdentity(Mat A,PetscBool* identity)
371 {
372   Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
373   PetscInt    i,j;
374 
375   PetscFunctionBegin;
376   if (a->p != a->q) {
377     *identity = PETSC_FALSE;
378     PetscFunctionReturn(0);
379   } else *identity = PETSC_TRUE;
380   if (!a->isTI || a->S) {
381     for (i=0; i<a->p && *identity; i++) {
382       for (j=0; j<a->p && *identity; j++) {
383         if (i != j) {
384           if (a->S && PetscAbsScalar(a->S[i+j*a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
385           if (a->T && PetscAbsScalar(a->T[i+j*a->p]) > PETSC_SMALL) *identity = PETSC_FALSE;
386         } else {
387           if (a->S && PetscAbsScalar(a->S[i*(a->p+1)]-a->S[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
388           if (a->T && PetscAbsScalar(a->T[i*(a->p+1)]-a->T[0]) > PETSC_SMALL) *identity = PETSC_FALSE;
389         }
390       }
391     }
392   }
393   PetscFunctionReturn(0);
394 }
395 
396 /*@C
397    MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix
398 
399    Logically Collective; the entire T is stored independently on all processes.
400 
401    Input Parameters:
402 +  A - the KAIJ matrix
403 .  p - the number of rows in S
404 .  q - the number of columns in S
405 -  T - the T matrix, in form of a scalar array in column-major format
406 
407    Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix.
408    The T matrix is copied, so the user can destroy this array.
409 
410    Level: Advanced
411 
412 .seealso: MatKAIJGetT(), MatKAIJSetS(), MatKAIJSetAIJ()
413 @*/
414 PetscErrorCode MatKAIJSetT(Mat A,PetscInt p,PetscInt q,const PetscScalar T[])
415 {
416   PetscErrorCode ierr;
417   PetscInt       i,j;
418   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
419   PetscBool      isTI = PETSC_FALSE;
420 
421   PetscFunctionBegin;
422   /* check if T is an identity matrix */
423   if (T && (p == q)) {
424     isTI = PETSC_TRUE;
425     for (i=0; i<p; i++) {
426       for (j=0; j<q; j++) {
427         if (i == j) {
428           /* diagonal term must be 1 */
429           if (T[i+j*p] != 1.0) isTI = PETSC_FALSE;
430         } else {
431           /* off-diagonal term must be 0 */
432           if (T[i+j*p] != 0.0) isTI = PETSC_FALSE;
433         }
434       }
435     }
436   }
437   a->isTI = isTI;
438 
439   ierr = PetscFree(a->T);CHKERRQ(ierr);
440   if (T && (!isTI)) {
441     ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->T);CHKERRQ(ierr);
442     ierr = PetscMemcpy(a->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
443   } else a->T = NULL;
444 
445   a->p = p;
446   a->q = q;
447   PetscFunctionReturn(0);
448 }
449 
450 PetscErrorCode MatDestroy_SeqKAIJ(Mat A)
451 {
452   PetscErrorCode ierr;
453   Mat_SeqKAIJ    *b = (Mat_SeqKAIJ*)A->data;
454 
455   PetscFunctionBegin;
456   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
457   ierr = PetscFree(b->S);CHKERRQ(ierr);
458   ierr = PetscFree(b->T);CHKERRQ(ierr);
459   ierr = PetscFree(b->ibdiag);CHKERRQ(ierr);
460   ierr = PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr);CHKERRQ(ierr);
461   ierr = PetscFree(A->data);CHKERRQ(ierr);
462   PetscFunctionReturn(0);
463 }
464 
465 PETSC_INTERN PetscErrorCode MatKAIJ_build_AIJ_OAIJ(Mat A)
466 {
467   PetscErrorCode   ierr;
468   Mat_MPIKAIJ      *a;
469   Mat_MPIAIJ       *mpiaij;
470   PetscScalar      *T;
471   PetscInt         i,j;
472   PetscObjectState state;
473 
474   PetscFunctionBegin;
475   a = (Mat_MPIKAIJ*)A->data;
476   mpiaij = (Mat_MPIAIJ*)a->A->data;
477 
478   ierr = PetscObjectStateGet((PetscObject)a->A,&state);CHKERRQ(ierr);
479   if (state == a->state) {
480     /* The existing AIJ and KAIJ members are up-to-date, so simply exit. */
481     PetscFunctionReturn(0);
482   } else {
483     ierr = MatDestroy(&a->AIJ);CHKERRQ(ierr);
484     ierr = MatDestroy(&a->OAIJ);CHKERRQ(ierr);
485     if (a->isTI) {
486       /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL.
487        * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will
488        * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix
489        * to pass in. */
490       ierr = PetscMalloc1(a->p*a->q*sizeof(PetscScalar),&T);CHKERRQ(ierr);
491       for (i=0; i<a->p; i++) {
492         for (j=0; j<a->q; j++) {
493           if (i==j) T[i+j*a->p] = 1.0;
494           else      T[i+j*a->p] = 0.0;
495         }
496       }
497     } else T = a->T;
498     ierr = MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ);CHKERRQ(ierr);
499     ierr = MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ);CHKERRQ(ierr);
500     if (a->isTI) {
501       ierr = PetscFree(T);CHKERRQ(ierr);
502     }
503     a->state = state;
504   }
505 
506   PetscFunctionReturn(0);
507 }
508 
509 PetscErrorCode MatSetUp_KAIJ(Mat A)
510 {
511   PetscErrorCode ierr;
512   PetscInt       n;
513   PetscMPIInt    size;
514   Mat_SeqKAIJ    *seqkaij = (Mat_SeqKAIJ*)A->data;
515 
516   PetscFunctionBegin;
517   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
518   if (size == 1) {
519     ierr = MatSetSizes(A,seqkaij->p*seqkaij->AIJ->rmap->n,seqkaij->q*seqkaij->AIJ->cmap->n,seqkaij->p*seqkaij->AIJ->rmap->N,seqkaij->q*seqkaij->AIJ->cmap->N);CHKERRQ(ierr);
520     ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr);
521     ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr);
522     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
523     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
524   } else {
525     Mat_MPIKAIJ *a;
526     Mat_MPIAIJ  *mpiaij;
527     IS          from,to;
528     Vec         gvec;
529     PetscScalar *T;
530     PetscInt    i,j;
531 
532     a = (Mat_MPIKAIJ*)A->data;
533     mpiaij = (Mat_MPIAIJ*)a->A->data;
534     ierr = MatSetSizes(A,a->p*a->A->rmap->n,a->q*a->A->cmap->n,a->p*a->A->rmap->N,a->q*a->A->cmap->N);CHKERRQ(ierr);
535     ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr);
536     ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr);
537     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
538     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
539 
540     if (a->isTI) {
541       /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL.
542        * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will
543        * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix
544        * to pass in. */
545       ierr = PetscMalloc1(a->p*a->q*sizeof(PetscScalar),&T);CHKERRQ(ierr);
546       for (i=0; i<a->p; i++) {
547         for (j=0; j<a->q; j++) {
548           if (i==j) T[i+j*a->p] = 1.0;
549           else      T[i+j*a->p] = 0.0;
550         }
551       }
552     } else T = a->T;
553     ierr = MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ);CHKERRQ(ierr);
554     ierr = MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ);CHKERRQ(ierr);
555     if (a->isTI) {
556       ierr = PetscFree(T);CHKERRQ(ierr);
557     }
558 
559     ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
560     ierr = VecCreate(PETSC_COMM_SELF,&a->w);CHKERRQ(ierr);
561     ierr = VecSetSizes(a->w,n*a->q,n*a->q);CHKERRQ(ierr);
562     ierr = VecSetBlockSize(a->w,a->q);CHKERRQ(ierr);
563     ierr = VecSetType(a->w,VECSEQ);CHKERRQ(ierr);
564 
565     /* create two temporary Index sets for build scatter gather */
566     ierr = ISCreateBlock(PetscObjectComm((PetscObject)a->A),a->q,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
567     ierr = ISCreateStride(PETSC_COMM_SELF,n*a->q,0,1,&to);CHKERRQ(ierr);
568 
569     /* create temporary global vector to generate scatter context */
570     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A),a->q,a->q*a->A->cmap->n,a->q*a->A->cmap->N,NULL,&gvec);CHKERRQ(ierr);
571 
572     /* generate the scatter context */
573     ierr = VecScatterCreate(gvec,from,a->w,to,&a->ctx);CHKERRQ(ierr);
574 
575     ierr = ISDestroy(&from);CHKERRQ(ierr);
576     ierr = ISDestroy(&to);CHKERRQ(ierr);
577     ierr = VecDestroy(&gvec);CHKERRQ(ierr);
578   }
579 
580   A->assembled = PETSC_TRUE;
581   PetscFunctionReturn(0);
582 }
583 
584 PetscErrorCode MatView_KAIJ(Mat A,PetscViewer viewer)
585 {
586   PetscViewerFormat format;
587   Mat_SeqKAIJ       *a = (Mat_SeqKAIJ*)A->data;
588   Mat               B;
589   PetscInt          i;
590   PetscErrorCode    ierr;
591   PetscBool         ismpikaij;
592 
593   PetscFunctionBegin;
594   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr);
595   ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
596   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL || format == PETSC_VIEWER_ASCII_IMPL) {
597     ierr = PetscViewerASCIIPrintf(viewer,"S and T have %D rows and %D columns\n",a->p,a->q);CHKERRQ(ierr);
598 
599     /* Print appropriate details for S. */
600     if (!a->S) {
601       ierr = PetscViewerASCIIPrintf(viewer,"S is NULL\n");CHKERRQ(ierr);
602     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
603       ierr = PetscViewerASCIIPrintf(viewer,"Entries of S are ");CHKERRQ(ierr);
604       for (i=0; i<(a->p * a->q); i++) {
605 #if defined(PETSC_USE_COMPLEX)
606         ierr = PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->S[i]),(double)PetscImaginaryPart(a->S[i]));CHKERRQ(ierr);
607 #else
608         ierr = PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->S[i]));CHKERRQ(ierr);
609 #endif
610       }
611       ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
612     }
613 
614     /* Print appropriate details for T. */
615     if (a->isTI) {
616       ierr = PetscViewerASCIIPrintf(viewer,"T is the identity matrix\n");CHKERRQ(ierr);
617     } else if (!a->T) {
618       ierr = PetscViewerASCIIPrintf(viewer,"T is NULL\n");CHKERRQ(ierr);
619     } else if (format == PETSC_VIEWER_ASCII_IMPL) {
620       ierr = PetscViewerASCIIPrintf(viewer,"Entries of T are ");CHKERRQ(ierr);
621       for (i=0; i<(a->p * a->q); i++) {
622 #if defined(PETSC_USE_COMPLEX)
623         ierr = PetscViewerASCIIPrintf(viewer,"%18.16e %18.16e ",(double)PetscRealPart(a->T[i]),(double)PetscImaginaryPart(a->T[i]));CHKERRQ(ierr);
624 #else
625         ierr = PetscViewerASCIIPrintf(viewer,"%18.16e ",(double)PetscRealPart(a->T[i]));CHKERRQ(ierr);
626 #endif
627       }
628       ierr = PetscViewerASCIIPrintf(viewer,"\n");CHKERRQ(ierr);
629     }
630 
631     /* Now print details for the AIJ matrix, using the AIJ viewer. */
632     ierr = PetscViewerASCIIPrintf(viewer,"Now viewing the associated AIJ matrix:\n");CHKERRQ(ierr);
633     if (ismpikaij) {
634       Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
635       ierr = MatView(b->A,viewer);CHKERRQ(ierr);
636     } else {
637       ierr = MatView(a->AIJ,viewer);CHKERRQ(ierr);
638     }
639 
640   } else {
641     /* For all other matrix viewer output formats, simply convert to an AIJ matrix and call MatView() on that. */
642     if (ismpikaij) {
643       ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
644     } else {
645       ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
646     }
647     ierr = MatView(B,viewer);CHKERRQ(ierr);
648     ierr = MatDestroy(&B);CHKERRQ(ierr);
649   }
650   PetscFunctionReturn(0);
651 }
652 
653 PetscErrorCode MatDestroy_MPIKAIJ(Mat A)
654 {
655   PetscErrorCode ierr;
656   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
657 
658   PetscFunctionBegin;
659   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
660   ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr);
661   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
662   ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr);
663   ierr = VecDestroy(&b->w);CHKERRQ(ierr);
664   ierr = PetscFree(b->S);CHKERRQ(ierr);
665   ierr = PetscFree(b->T);CHKERRQ(ierr);
666   ierr = PetscFree(b->ibdiag);CHKERRQ(ierr);
667   ierr = PetscFree(A->data);CHKERRQ(ierr);
668   PetscFunctionReturn(0);
669 }
670 
671 /* --------------------------------------------------------------------------------------*/
672 
673 /* zz = yy + Axx */
674 PetscErrorCode MatMultAdd_SeqKAIJ(Mat A,Vec xx,Vec yy,Vec zz)
675 {
676   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ*)A->data;
677   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
678   const PetscScalar *s = b->S, *t = b->T;
679   const PetscScalar *x,*v,*bx;
680   PetscScalar       *y,*sums;
681   PetscErrorCode    ierr;
682   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
683   PetscInt          n,i,jrow,j,l,p=b->p,q=b->q,k;
684 
685   PetscFunctionBegin;
686   if (!yy) {
687     ierr = VecSet(zz,0.0);CHKERRQ(ierr);
688   } else {
689     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
690   }
691   if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0);
692 
693   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
694   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
695   idx  = a->j;
696   v    = a->a;
697   ii   = a->i;
698 
699   if (b->isTI) {
700     for (i=0; i<m; i++) {
701       jrow = ii[i];
702       n    = ii[i+1] - jrow;
703       sums = y + p*i;
704       for (j=0; j<n; j++) {
705         for (k=0; k<p; k++) {
706           sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k];
707         }
708       }
709     }
710     ierr = PetscLogFlops(3.0*(a->nz)*p);CHKERRQ(ierr);
711   } else if (t) {
712     for (i=0; i<m; i++) {
713       jrow = ii[i];
714       n    = ii[i+1] - jrow;
715       sums = y + p*i;
716       for (j=0; j<n; j++) {
717         for (k=0; k<p; k++) {
718           for (l=0; l<q; l++) {
719             sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l];
720           }
721         }
722       }
723     }
724     /* The flop count below assumes that v[jrow+j] is hoisted out (which an optimizing compiler is likely to do),
725      * and also that T part is hoisted outside this loop (in exchange for temporary storage) as (A \otimes I) (I \otimes T),
726      * so that this multiply doesn't have to be redone for each matrix entry, but just once per column. The latter
727      * transformation is much less likely to be applied, but we nonetheless count the minimum flops required. */
728     ierr = PetscLogFlops((2.0*p*q-p)*m+2.0*p*a->nz);CHKERRQ(ierr);
729   }
730   if (s) {
731     for (i=0; i<m; i++) {
732       sums = y + p*i;
733       bx   = x + q*i;
734       if (i < b->AIJ->cmap->n) {
735         for (j=0; j<q; j++) {
736           for (k=0; k<p; k++) {
737             sums[k] += s[k+j*p]*bx[j];
738           }
739         }
740       }
741     }
742     ierr = PetscLogFlops(2.0*m*p*q);CHKERRQ(ierr);
743   }
744 
745   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
746   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
747   PetscFunctionReturn(0);
748 }
749 
750 PetscErrorCode MatMult_SeqKAIJ(Mat A,Vec xx,Vec yy)
751 {
752   PetscErrorCode ierr;
753   PetscFunctionBegin;
754   ierr = MatMultAdd_SeqKAIJ(A,xx,PETSC_NULL,yy);CHKERRQ(ierr);
755   PetscFunctionReturn(0);
756 }
757 
758 #include <petsc/private/kernels/blockinvert.h>
759 
760 PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A,const PetscScalar **values)
761 {
762   Mat_SeqKAIJ       *b  = (Mat_SeqKAIJ*)A->data;
763   Mat_SeqAIJ        *a  = (Mat_SeqAIJ*)b->AIJ->data;
764   const PetscScalar *S  = b->S;
765   const PetscScalar *T  = b->T;
766   const PetscScalar *v  = a->a;
767   const PetscInt     p  = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i;
768   PetscErrorCode    ierr;
769   PetscInt          i,j,*v_pivots,dof,dof2;
770   PetscScalar       *diag,aval,*v_work;
771 
772   PetscFunctionBegin;
773   if (p != q) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse.");
774   if ((!S) && (!T) && (!b->isTI)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix.");
775 
776   dof  = p;
777   dof2 = dof*dof;
778 
779   if (b->ibdiagvalid) {
780     if (values) *values = b->ibdiag;
781     PetscFunctionReturn(0);
782   }
783   if (!b->ibdiag) {
784     ierr = PetscMalloc1(dof2*m*sizeof(PetscScalar),&b->ibdiag);CHKERRQ(ierr);
785     ierr = PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar));CHKERRQ(ierr);
786   }
787   if (values) *values = b->ibdiag;
788   diag = b->ibdiag;
789 
790   ierr = PetscMalloc2(dof,&v_work,dof,&v_pivots);CHKERRQ(ierr);
791   for (i=0; i<m; i++) {
792     if (S) {
793       ierr = PetscMemcpy(diag,S,dof2*sizeof(PetscScalar));CHKERRQ(ierr);
794     } else {
795       ierr = PetscMemzero(diag,dof2*sizeof(PetscScalar));CHKERRQ(ierr);
796     }
797     if (b->isTI) {
798       aval = 0;
799       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
800       for (j=0; j<dof; j++) diag[j+dof*j] += aval;
801     } else if (T) {
802       aval = 0;
803       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
804       for (j=0; j<dof2; j++) diag[j] += aval*T[j];
805     }
806     ierr = PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL);CHKERRQ(ierr);
807     diag += dof2;
808   }
809   ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
810 
811   b->ibdiagvalid = PETSC_TRUE;
812   PetscFunctionReturn(0);
813 }
814 
815 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B)
816 {
817   Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data;
818 
819   PetscFunctionBegin;
820   *B = kaij->AIJ;
821   PetscFunctionReturn(0);
822 }
823 
824 PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
825 {
826   PetscErrorCode    ierr;
827   Mat_SeqKAIJ       *kaij = (Mat_SeqKAIJ*) A->data;
828   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)kaij->AIJ->data;
829   const PetscScalar *aa = a->a, *T = kaij->T, *v;
830   const PetscInt    m  = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi;
831   const PetscScalar *b, *xb, *idiag;
832   PetscScalar       *x, *work, *workt, *w, *y, *arr, *t, *arrt;
833   PetscInt          i, j, k, i2, bs, bs2, nz;
834 
835   PetscFunctionBegin;
836   its = its*lits;
837   if (flag & SOR_EISENSTAT) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
838   if (its <= 0)             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
839   if (fshift)               SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift");
840   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts");
841   if (p != q) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: No support for non-square dense blocks");
842   else        {bs = p; bs2 = bs*bs; }
843 
844   if (!m) PetscFunctionReturn(0);
845 
846   if (!kaij->ibdiagvalid) { ierr = MatInvertBlockDiagonal_SeqKAIJ(A,NULL);CHKERRQ(ierr); }
847   idiag = kaij->ibdiag;
848   diag  = a->diag;
849 
850   if (!kaij->sor.setup) {
851     ierr = PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr);CHKERRQ(ierr);
852     kaij->sor.setup = PETSC_TRUE;
853   }
854   y     = kaij->sor.y;
855   w     = kaij->sor.w;
856   work  = kaij->sor.work;
857   t     = kaij->sor.t;
858   arr   = kaij->sor.arr;
859 
860   ierr = VecGetArray(xx,&x);    CHKERRQ(ierr);
861   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
862 
863   if (flag & SOR_ZERO_INITIAL_GUESS) {
864     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
865       PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);                            /* x[0:bs] <- D^{-1} b[0:bs] */
866       ierr   =  PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
867       i2     =  bs;
868       idiag  += bs2;
869       for (i=1; i<m; i++) {
870         v  = aa + ai[i];
871         vi = aj + ai[i];
872         nz = diag[i] - ai[i];
873 
874         if (T) {                /* b - T (Arow * x) */
875           ierr = PetscMemzero(w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
876           for (j=0; j<nz; j++) {
877             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
878           }
879           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]);
880           for (k=0; k<bs; k++) t[i2+k] += b[i2+k];
881         } else if (kaij->isTI) {
882           ierr = PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
883           for (j=0; j<nz; j++) {
884             for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k];
885           }
886         } else {
887           ierr = PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
888         }
889 
890         PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y);
891         for (j=0; j<bs; j++) x[i2+j] = omega * y[j];
892 
893         idiag += bs2;
894         i2    += bs;
895       }
896       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
897       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
898       xb = t;
899     } else xb = b;
900     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
901       idiag = kaij->ibdiag+bs2*(m-1);
902       i2    = bs * (m-1);
903       ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
904       PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
905       i2    -= bs;
906       idiag -= bs2;
907       for (i=m-2; i>=0; i--) {
908         v  = aa + diag[i] + 1 ;
909         vi = aj + diag[i] + 1;
910         nz = ai[i+1] - diag[i] - 1;
911 
912         if (T) {                /* FIXME: This branch untested */
913           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
914           /* copy all rows of x that are needed into contiguous space */
915           workt = work;
916           for (j=0; j<nz; j++) {
917             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
918             workt += bs;
919           }
920           arrt = arr;
921           for (j=0; j<nz; j++) {
922             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
923             for (k=0; k<bs2; k++) arrt[k] *= v[j];
924             arrt += bs2;
925           }
926           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
927         } else if (kaij->isTI) {
928           ierr = PetscMemcpy(w,t+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
929           for (j=0; j<nz; j++) {
930             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
931           }
932         }
933 
934         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */
935         for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j];
936 
937         idiag -= bs2;
938         i2    -= bs;
939       }
940       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
941     }
942     its--;
943   }
944   while (its--) {               /* FIXME: This branch not updated */
945     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
946       i2     =  0;
947       idiag  = kaij->ibdiag;
948       for (i=0; i<m; i++) {
949         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
950 
951         v  = aa + ai[i];
952         vi = aj + ai[i];
953         nz = diag[i] - ai[i];
954         workt = work;
955         for (j=0; j<nz; j++) {
956           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
957           workt += bs;
958         }
959         arrt = arr;
960         if (T) {
961           for (j=0; j<nz; j++) {
962             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
963             for (k=0; k<bs2; k++) arrt[k] *= v[j];
964             arrt += bs2;
965           }
966           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
967         } else if (kaij->isTI) {
968           for (j=0; j<nz; j++) {
969             ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
970             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
971             arrt += bs2;
972           }
973           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
974         }
975         ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
976 
977         v  = aa + diag[i] + 1;
978         vi = aj + diag[i] + 1;
979         nz = ai[i+1] - diag[i] - 1;
980         workt = work;
981         for (j=0; j<nz; j++) {
982           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
983           workt += bs;
984         }
985         arrt = arr;
986         if (T) {
987           for (j=0; j<nz; j++) {
988             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
989             for (k=0; k<bs2; k++) arrt[k] *= v[j];
990             arrt += bs2;
991           }
992           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
993         } else if (kaij->isTI) {
994           for (j=0; j<nz; j++) {
995             ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
996             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
997             arrt += bs2;
998           }
999           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1000         }
1001 
1002         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
1003         for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
1004 
1005         idiag += bs2;
1006         i2    += bs;
1007       }
1008       xb = t;
1009     }
1010     else xb = b;
1011     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
1012       idiag = kaij->ibdiag+bs2*(m-1);
1013       i2    = bs * (m-1);
1014       if (xb == b) {
1015         for (i=m-1; i>=0; i--) {
1016           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1017 
1018           v  = aa + ai[i];
1019           vi = aj + ai[i];
1020           nz = diag[i] - ai[i];
1021           workt = work;
1022           for (j=0; j<nz; j++) {
1023             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1024             workt += bs;
1025           }
1026           arrt = arr;
1027           if (T) {
1028             for (j=0; j<nz; j++) {
1029               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
1030               for (k=0; k<bs2; k++) arrt[k] *= v[j];
1031               arrt += bs2;
1032             }
1033             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1034           } else if (kaij->isTI) {
1035             for (j=0; j<nz; j++) {
1036               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
1037               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
1038               arrt += bs2;
1039             }
1040             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1041           }
1042 
1043           v  = aa + diag[i] + 1;
1044           vi = aj + diag[i] + 1;
1045           nz = ai[i+1] - diag[i] - 1;
1046           workt = work;
1047           for (j=0; j<nz; j++) {
1048             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1049             workt += bs;
1050           }
1051           arrt = arr;
1052           if (T) {
1053             for (j=0; j<nz; j++) {
1054               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
1055               for (k=0; k<bs2; k++) arrt[k] *= v[j];
1056               arrt += bs2;
1057             }
1058             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1059           } else if (kaij->isTI) {
1060             for (j=0; j<nz; j++) {
1061               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
1062               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
1063               arrt += bs2;
1064             }
1065             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1066           }
1067 
1068           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
1069           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
1070         }
1071       } else {
1072         for (i=m-1; i>=0; i--) {
1073           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
1074           v  = aa + diag[i] + 1;
1075           vi = aj + diag[i] + 1;
1076           nz = ai[i+1] - diag[i] - 1;
1077           workt = work;
1078           for (j=0; j<nz; j++) {
1079             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
1080             workt += bs;
1081           }
1082           arrt = arr;
1083           if (T) {
1084             for (j=0; j<nz; j++) {
1085               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
1086               for (k=0; k<bs2; k++) arrt[k] *= v[j];
1087               arrt += bs2;
1088             }
1089             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1090           } else if (kaij->isTI) {
1091             for (j=0; j<nz; j++) {
1092               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
1093               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
1094               arrt += bs2;
1095             }
1096             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
1097           }
1098           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
1099           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
1100         }
1101       }
1102       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
1103     }
1104   }
1105 
1106   ierr = VecRestoreArray(xx,&x);    CHKERRQ(ierr);
1107   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
1108   PetscFunctionReturn(0);
1109 }
1110 
1111 /*===================================================================================*/
1112 
1113 PetscErrorCode MatMultAdd_MPIKAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1114 {
1115   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
1116   PetscErrorCode ierr;
1117 
1118   PetscFunctionBegin;
1119   if (!yy) {
1120     ierr = VecSet(zz,0.0);CHKERRQ(ierr);
1121   } else {
1122     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
1123   }
1124   /* start the scatter */
1125   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1126   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz);CHKERRQ(ierr);
1127   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1128   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
1129   PetscFunctionReturn(0);
1130 }
1131 
1132 PetscErrorCode MatMult_MPIKAIJ(Mat A,Vec xx,Vec yy)
1133 {
1134   PetscErrorCode ierr;
1135   PetscFunctionBegin;
1136   ierr = MatMultAdd_MPIKAIJ(A,xx,PETSC_NULL,yy);CHKERRQ(ierr);
1137   PetscFunctionReturn(0);
1138 }
1139 
1140 PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A,const PetscScalar **values)
1141 {
1142   Mat_MPIKAIJ     *b = (Mat_MPIKAIJ*)A->data;
1143   PetscErrorCode  ierr;
1144 
1145   PetscFunctionBegin;
1146   ierr = (*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values);CHKERRQ(ierr);
1147   PetscFunctionReturn(0);
1148 }
1149 
1150 /* ----------------------------------------------------------------*/
1151 
1152 PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
1153 {
1154   Mat_SeqKAIJ     *b   = (Mat_SeqKAIJ*) A->data;
1155   PetscErrorCode  diag = PETSC_FALSE;
1156   PetscErrorCode  ierr;
1157   PetscInt        nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c;
1158   PetscScalar     *vaij,*v,*S=b->S,*T=b->T;
1159 
1160   PetscFunctionBegin;
1161   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1162   b->getrowactive = PETSC_TRUE;
1163   if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row);
1164 
1165   if ((!S) && (!T) && (!b->isTI)) {
1166     if (ncols)    *ncols  = 0;
1167     if (cols)     *cols   = NULL;
1168     if (values)   *values = NULL;
1169     PetscFunctionReturn(0);
1170   }
1171 
1172   if (T || b->isTI) {
1173     ierr  = MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij);CHKERRQ(ierr);
1174     c     = nzaij;
1175     for (i=0; i<nzaij; i++) {
1176       /* check if this row contains a diagonal entry */
1177       if (colsaij[i] == r) {
1178         diag = PETSC_TRUE;
1179         c = i;
1180       }
1181     }
1182   } else nzaij = c = 0;
1183 
1184   /* calculate size of row */
1185   nz = 0;
1186   if (S)            nz += q;
1187   if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q);
1188 
1189   if (cols || values) {
1190     ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr);
1191     for (i=0; i<q; i++) {
1192       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1193       v[i] = 0.0;
1194     }
1195     if (b->isTI) {
1196       for (i=0; i<nzaij; i++) {
1197         for (j=0; j<q; j++) {
1198           idx[i*q+j] = colsaij[i]*q+j;
1199           v[i*q+j]   = (j==s ? vaij[i] : 0);
1200         }
1201       }
1202     } else if (T) {
1203       for (i=0; i<nzaij; i++) {
1204         for (j=0; j<q; j++) {
1205           idx[i*q+j] = colsaij[i]*q+j;
1206           v[i*q+j]   = vaij[i]*T[s+j*p];
1207         }
1208       }
1209     }
1210     if (S) {
1211       for (j=0; j<q; j++) {
1212         idx[c*q+j] = r*q+j;
1213         v[c*q+j]  += S[s+j*p];
1214       }
1215     }
1216   }
1217 
1218   if (ncols)    *ncols  = nz;
1219   if (cols)     *cols   = idx;
1220   if (values)   *values = v;
1221   PetscFunctionReturn(0);
1222 }
1223 
1224 PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1225 {
1226   PetscErrorCode ierr;
1227 
1228   PetscFunctionBegin;
1229   if (nz) *nz = 0;
1230   ierr = PetscFree2(*idx,*v);CHKERRQ(ierr);
1231   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
1232   PetscFunctionReturn(0);
1233 }
1234 
1235 PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
1236 {
1237   Mat_MPIKAIJ     *b      = (Mat_MPIKAIJ*) A->data;
1238   Mat             MatAIJ  = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ;
1239   Mat             MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ;
1240   Mat             AIJ     = b->A;
1241   PetscBool       diag    = PETSC_FALSE;
1242   PetscErrorCode  ierr;
1243   const PetscInt  rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray;
1244   PetscInt        nz,*idx,ncolsaij = 0,ncolsoaij = 0,*colsaij,*colsoaij,r,s,c,i,j,lrow;
1245   PetscScalar     *v,*vals,*ovals,*S=b->S,*T=b->T;
1246 
1247   PetscFunctionBegin;
1248   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1249   b->getrowactive = PETSC_TRUE;
1250   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1251   lrow = row - rstart;
1252 
1253   if ((!S) && (!T) && (!b->isTI)) {
1254     if (ncols)    *ncols  = 0;
1255     if (cols)     *cols   = NULL;
1256     if (values)   *values = NULL;
1257     PetscFunctionReturn(0);
1258   }
1259 
1260   r = lrow/p;
1261   s = lrow%p;
1262 
1263   if (T || b->isTI) {
1264     ierr = MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray);CHKERRQ(ierr);
1265     ierr = MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals);CHKERRQ(ierr);
1266     ierr = MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals);CHKERRQ(ierr);
1267 
1268     c     = ncolsaij + ncolsoaij;
1269     for (i=0; i<ncolsaij; i++) {
1270       /* check if this row contains a diagonal entry */
1271       if (colsaij[i] == r) {
1272         diag = PETSC_TRUE;
1273         c = i;
1274       }
1275     }
1276   } else c = 0;
1277 
1278   /* calculate size of row */
1279   nz = 0;
1280   if (S)            nz += q;
1281   if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q);
1282 
1283   if (cols || values) {
1284     ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr);
1285     for (i=0; i<q; i++) {
1286       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1287       v[i] = 0.0;
1288     }
1289     if (b->isTI) {
1290       for (i=0; i<ncolsaij; i++) {
1291         for (j=0; j<q; j++) {
1292           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1293           v[i*q+j]   = (j==s ? vals[i] : 0.0);
1294         }
1295       }
1296       for (i=0; i<ncolsoaij; i++) {
1297         for (j=0; j<q; j++) {
1298           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1299           v[(i+ncolsaij)*q+j]   = (j==s ? ovals[i]: 0.0);
1300         }
1301       }
1302     } else if (T) {
1303       for (i=0; i<ncolsaij; i++) {
1304         for (j=0; j<q; j++) {
1305           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1306           v[i*q+j]   = vals[i]*T[s+j*p];
1307         }
1308       }
1309       for (i=0; i<ncolsoaij; i++) {
1310         for (j=0; j<q; j++) {
1311           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1312           v[(i+ncolsaij)*q+j]   = ovals[i]*T[s+j*p];
1313         }
1314       }
1315     }
1316     if (S) {
1317       for (j=0; j<q; j++) {
1318         idx[c*q+j] = (r+rstart/p)*q+j;
1319         v[c*q+j]  += S[s+j*p];
1320       }
1321     }
1322   }
1323 
1324   if (ncols)  *ncols  = nz;
1325   if (cols)   *cols   = idx;
1326   if (values) *values = v;
1327   PetscFunctionReturn(0);
1328 }
1329 
1330 PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1331 {
1332   PetscErrorCode ierr;
1333   PetscFunctionBegin;
1334   ierr = PetscFree2(*idx,*v);CHKERRQ(ierr);
1335   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
1336   PetscFunctionReturn(0);
1337 }
1338 
1339 PetscErrorCode  MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
1340 {
1341   PetscErrorCode ierr;
1342   Mat            A;
1343 
1344   PetscFunctionBegin;
1345   ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1346   ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr);
1347   ierr = MatDestroy(&A);CHKERRQ(ierr);
1348   PetscFunctionReturn(0);
1349 }
1350 
1351 /* ---------------------------------------------------------------------------------- */
1352 /*@C
1353   MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form:
1354 
1355     [I \otimes S + A \otimes T]
1356 
1357   where
1358     S is a dense (p \times q) matrix
1359     T is a dense (p \times q) matrix
1360     A is an AIJ  (n \times n) matrix
1361     I is the identity matrix
1362   The resulting matrix is (np \times nq)
1363 
1364   S and T are always stored independently on all processes as PetscScalar arrays in column-major format.
1365 
1366   Collective
1367 
1368   Input Parameters:
1369 + A - the AIJ matrix
1370 . p - number of rows in S and T
1371 . q - number of columns in S and T
1372 . S - the S matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major)
1373 - T - the T matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major)
1374 
1375   Output Parameter:
1376 . kaij - the new KAIJ matrix
1377 
1378   Notes:
1379   This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed.
1380   Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix.
1381 
1382   Level: advanced
1383 
1384 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MATKAIJ
1385 @*/
1386 PetscErrorCode  MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij)
1387 {
1388   PetscErrorCode ierr;
1389   PetscMPIInt    size;
1390 
1391   PetscFunctionBegin;
1392   ierr = MatCreate(PetscObjectComm((PetscObject)A),kaij);CHKERRQ(ierr);
1393   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
1394   if (size == 1) {
1395     ierr = MatSetType(*kaij,MATSEQKAIJ);CHKERRQ(ierr);
1396   } else {
1397     ierr = MatSetType(*kaij,MATMPIKAIJ);CHKERRQ(ierr);
1398   }
1399   ierr = MatKAIJSetAIJ(*kaij,A);CHKERRQ(ierr);
1400   ierr = MatKAIJSetS(*kaij,p,q,S);CHKERRQ(ierr);
1401   ierr = MatKAIJSetT(*kaij,p,q,T);CHKERRQ(ierr);
1402   ierr = MatSetUp(*kaij);CHKERRQ(ierr);
1403   PetscFunctionReturn(0);
1404 }
1405 
1406 /*MC
1407   MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of form
1408     [I \otimes S + A \otimes T],
1409   where
1410     S is a dense (p \times q) matrix,
1411     T is a dense (p \times q) matrix,
1412     A is an AIJ  (n \times n) matrix,
1413     and I is the identity matrix.
1414   The resulting matrix is (np \times nq).
1415 
1416   S and T are always stored independently on all processes as PetscScalar arrays in column-major format.
1417 
1418   Notes:
1419   A linear system with multiple right-hand sides, AX = B, can be expressed in the KAIJ-friendly form of (A \otimes I) x = b,
1420   where x and b are column vectors containing the row-major representations of X and B.
1421 
1422   Level: advanced
1423 
1424 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ()
1425 M*/
1426 
1427 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A)
1428 {
1429   PetscErrorCode ierr;
1430   Mat_MPIKAIJ    *b;
1431   PetscMPIInt    size;
1432 
1433   PetscFunctionBegin;
1434   ierr     = PetscNewLog(A,&b);CHKERRQ(ierr);
1435   A->data  = (void*)b;
1436 
1437   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1438 
1439   A->ops->setup = MatSetUp_KAIJ;
1440 
1441   b->w    = NULL;
1442   ierr    = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRMPI(ierr);
1443   if (size == 1) {
1444     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ);CHKERRQ(ierr);
1445     A->ops->setup               = MatSetUp_KAIJ;
1446     A->ops->destroy             = MatDestroy_SeqKAIJ;
1447     A->ops->view                = MatView_KAIJ;
1448     A->ops->mult                = MatMult_SeqKAIJ;
1449     A->ops->multadd             = MatMultAdd_SeqKAIJ;
1450     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ;
1451     A->ops->getrow              = MatGetRow_SeqKAIJ;
1452     A->ops->restorerow          = MatRestoreRow_SeqKAIJ;
1453     A->ops->sor                 = MatSOR_SeqKAIJ;
1454   } else {
1455     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ);CHKERRQ(ierr);
1456     A->ops->setup               = MatSetUp_KAIJ;
1457     A->ops->destroy             = MatDestroy_MPIKAIJ;
1458     A->ops->view                = MatView_KAIJ;
1459     A->ops->mult                = MatMult_MPIKAIJ;
1460     A->ops->multadd             = MatMultAdd_MPIKAIJ;
1461     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ;
1462     A->ops->getrow              = MatGetRow_MPIKAIJ;
1463     A->ops->restorerow          = MatRestoreRow_MPIKAIJ;
1464     ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ);CHKERRQ(ierr);
1465   }
1466   A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ;
1467   PetscFunctionReturn(0);
1468 }
1469