xref: /petsc/src/mat/impls/kaij/kaij.c (revision 15b9d0259c2be3cdae1589459f3c0981598a13e3)
1 
2 /*
3   Defines the basic matrix operations for the KAIJ  matrix storage format.
4   This format is used to evaluate matrices of the form:
5 
6     [I \otimes S + A \otimes T]
7 
8   where
9     S is a dense (p \times q) matrix
10     T is a dense (p \times q) matrix
11     A is an AIJ  (n \times n) matrix
12     I is the identity matrix
13 
14   The resulting matrix is (np \times nq)
15 
16   We provide:
17      MatMult()
18      MatMultAdd()
19      MatInvertBlockDiagonal()
20   and
21      MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*)
22 
23   This single directory handles both the sequential and parallel codes
24 */
25 
26 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/
27 #include <../src/mat/utils/freespace.h>
28 #include <petsc/private/vecimpl.h>
29 
30 /*@C
31    MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix
32 
33    Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel
34 
35    Input Parameter:
36 .  A - the KAIJ matrix
37 
38    Output Parameter:
39 .  B - the AIJ matrix
40 
41    Level: advanced
42 
43    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
44 
45 .seealso: MatCreateKAIJ()
46 @*/
47 PetscErrorCode  MatKAIJGetAIJ(Mat A,Mat *B)
48 {
49   PetscErrorCode ierr;
50   PetscBool      ismpikaij,isseqkaij;
51 
52   PetscFunctionBegin;
53   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr);
54   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij);CHKERRQ(ierr);
55   if (ismpikaij) {
56     Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
57 
58     *B = b->A;
59   } else if (isseqkaij) {
60     Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
61 
62     *B = b->AIJ;
63   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix passed in is not of type KAIJ");
64   PetscFunctionReturn(0);
65 }
66 
67 /*@C
68    MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix
69 
70    Not Collective; the entire S is stored and returned independently on all processes.
71 
72    Input Parameter:
73 .  A - the KAIJ matrix
74 
75    Output Parameters:
76 +  m - the number of rows in S
77 .  n - the number of columns in S
78 -  S - the S matrix, in form of a scalar array in column-major format
79 
80    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
81 
82    Level: advanced
83 
84 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
85 @*/
86 PetscErrorCode MatKAIJGetS(Mat A,PetscInt *m,PetscInt *n,PetscScalar **S)
87 {
88   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
89   PetscFunctionBegin;
90   if (m) *m = b->p;
91   if (n) *n = b->q;
92   if (S) *S = b->S;
93   PetscFunctionReturn(0);
94 }
95 
96 /*@C
97    MatKAIJGetSRead - Get a read-only pointer to the S matrix describing the shift action of the KAIJ matrix
98 
99    Not Collective; the entire S is stored and returned independently on all processes.
100 
101    Input Parameter:
102 .  A - the KAIJ matrix
103 
104    Output Parameters:
105 +  m - the number of rows in S
106 .  n - the number of columns in S
107 -  S - the S matrix, in form of a scalar array in column-major format
108 
109    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
110 
111    Level: advanced
112 
113 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
114 @*/
115 PetscErrorCode MatKAIJGetSRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **S)
116 {
117   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
118   PetscFunctionBegin;
119   if (m) *m = b->p;
120   if (n) *n = b->q;
121   if (S) *S = b->S;
122   PetscFunctionReturn(0);
123 }
124 
125 /*@C
126   MatKAIJRestoreS - Restore array obtained with MatKAIJGetS()
127 
128   Not collective
129 
130   Input Parameter:
131 . A - the KAIJ matrix
132 
133   Output Parameter:
134 . S - location of pointer to array obtained with MatKAIJGetS()
135 
136   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
137   If NULL is passed, it will not attempt to zero the array pointer.
138 
139   Level: advanced
140 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead()
141 @*/
142 PetscErrorCode MatKAIJRestoreS(Mat A,PetscScalar **S)
143 {
144   PetscFunctionBegin;
145   if (S) *S = NULL;
146   PetscObjectStateIncrease((PetscObject)A);
147   PetscFunctionReturn(0);
148 }
149 
150 /*@C
151   MatKAIJRestoreSRead - Restore array obtained with MatKAIJGetSRead()
152 
153   Not collective
154 
155   Input Parameter:
156 . A - the KAIJ matrix
157 
158   Output Parameter:
159 . S - location of pointer to array obtained with MatKAIJGetS()
160 
161   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
162   If NULL is passed, it will not attempt to zero the array pointer.
163 
164   Level: advanced
165 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead()
166 @*/
167 PetscErrorCode MatKAIJRestoreSRead(Mat A,const PetscScalar **S)
168 {
169   PetscFunctionBegin;
170   if (S) *S = NULL;
171   PetscFunctionReturn(0);
172 }
173 
174 /*@C
175    MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix
176 
177    Not Collective; the entire T is stored and returned independently on all processes
178 
179    Input Parameter:
180 .  A - the KAIJ matrix
181 
182    Output Parameter:
183 +  m - the number of rows in T
184 .  n - the number of columns in T
185 -  T - the T matrix, in form of a scalar array in column-major format
186 
187    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
188 
189    Level: advanced
190 
191 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
192 @*/
193 PetscErrorCode MatKAIJGetT(Mat A,PetscInt *m,PetscInt *n,PetscScalar **T)
194 {
195   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
196   PetscFunctionBegin;
197   if (m) *m = b->p;
198   if (n) *n = b->q;
199   if (T) *T = b->T;
200   PetscFunctionReturn(0);
201 }
202 
203 /*@C
204    MatKAIJGetTRead - Get a read-only pointer to the transformation matrix T associated with the KAIJ matrix
205 
206    Not Collective; the entire T is stored and returned independently on all processes
207 
208    Input Parameter:
209 .  A - the KAIJ matrix
210 
211    Output Parameter:
212 +  m - the number of rows in T
213 .  n - the number of columns in T
214 -  T - the T matrix, in form of a scalar array in column-major format
215 
216    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
217 
218    Level: advanced
219 
220 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
221 @*/
222 PetscErrorCode MatKAIJGetTRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **T)
223 {
224   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
225   PetscFunctionBegin;
226   if (m) *m = b->p;
227   if (n) *n = b->q;
228   if (T) *T = b->T;
229   PetscFunctionReturn(0);
230 }
231 
232 /*@C
233   MatKAIJRestoreT - Restore array obtained with MatKAIJGetT()
234 
235   Not collective
236 
237   Input Parameter:
238 . A - the KAIJ matrix
239 
240   Output Parameter:
241 . T - location of pointer to array obtained with MatKAIJGetS()
242 
243   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
244   If NULL is passed, it will not attempt to zero the array pointer.
245 
246   Level: advanced
247 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead()
248 @*/
249 PetscErrorCode MatKAIJRestoreT(Mat A,PetscScalar **T)
250 {
251   PetscFunctionBegin;
252   if (T) *T = NULL;
253   PetscObjectStateIncrease((PetscObject)A);
254   PetscFunctionReturn(0);
255 }
256 
257 /*@C
258   MatKAIJRestoreTRead - Restore array obtained with MatKAIJGetTRead()
259 
260   Not collective
261 
262   Input Parameter:
263 . A - the KAIJ matrix
264 
265   Output Parameter:
266 . T - location of pointer to array obtained with MatKAIJGetS()
267 
268   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
269   If NULL is passed, it will not attempt to zero the array pointer.
270 
271   Level: advanced
272 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead()
273 @*/
274 PetscErrorCode MatKAIJRestoreTRead(Mat A,const PetscScalar **T)
275 {
276   PetscFunctionBegin;
277   if (T) *T = NULL;
278   PetscFunctionReturn(0);
279 }
280 
281 /*@
282    MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix
283 
284    Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel
285 
286    Input Parameters:
287 +  A - the KAIJ matrix
288 -  B - the AIJ matrix
289 
290    Notes:
291    This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed.
292    Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix.
293 
294    Level: advanced
295 
296 .seealso: MatKAIJGetAIJ(), MatKAIJSetS(), MatKAIJSetT()
297 @*/
298 PetscErrorCode MatKAIJSetAIJ(Mat A,Mat B)
299 {
300   PetscErrorCode ierr;
301   PetscMPIInt    size;
302 
303   PetscFunctionBegin;
304   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
305   if (size == 1) {
306     Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
307     a->AIJ = B;
308   } else {
309     Mat_MPIKAIJ *a = (Mat_MPIKAIJ*)A->data;
310     a->A = B;
311   }
312   ierr = PetscObjectReference((PetscObject)B);CHKERRQ(ierr);
313   PetscFunctionReturn(0);
314 }
315 
316 /*@C
317    MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix
318 
319    Logically Collective; the entire S is stored independently on all processes.
320 
321    Input Parameters:
322 +  A - the KAIJ matrix
323 .  p - the number of rows in S
324 .  q - the number of columns in S
325 -  S - the S matrix, in form of a scalar array in column-major format
326 
327    Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix.
328 
329    Level: Advanced
330 
331 .seealso: MatKAIJGetS(), MatKAIJSetT(), MatKAIJSetAIJ()
332 @*/
333 PetscErrorCode MatKAIJSetS(Mat A,PetscInt p,PetscInt q,const PetscScalar S[])
334 {
335   PetscErrorCode ierr;
336   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
337 
338   PetscFunctionBegin;
339   ierr = PetscFree(a->S);CHKERRQ(ierr);
340   if (S) {
341     ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->S);CHKERRQ(ierr);
342     ierr = PetscMemcpy(a->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
343   } else  a->S = NULL;
344 
345   a->p = p;
346   a->q = q;
347   PetscFunctionReturn(0);
348 }
349 
350 /*@C
351    MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix
352 
353    Logically Collective; the entire T is stored independently on all processes.
354 
355    Input Parameters:
356 +  A - the KAIJ matrix
357 .  p - the number of rows in S
358 .  q - the number of columns in S
359 -  T - the T matrix, in form of a scalar array in column-major format
360 
361    Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix.
362 
363    Level: Advanced
364 
365 .seealso: MatKAIJGetT(), MatKAIJSetS(), MatKAIJSetAIJ()
366 @*/
367 PetscErrorCode MatKAIJSetT(Mat A,PetscInt p,PetscInt q,const PetscScalar T[])
368 {
369   PetscErrorCode ierr;
370   PetscInt       i,j;
371   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
372   PetscBool      isTI = PETSC_FALSE;
373 
374   PetscFunctionBegin;
375   /* check if T is an identity matrix */
376   if (T && (p == q)) {
377     isTI = PETSC_TRUE;
378     for (i=0; i<p; i++) {
379       for (j=0; j<q; j++) {
380         if (i == j) {
381           /* diagonal term must be 1 */
382           if (T[i+j*p] != 1.0) isTI = PETSC_FALSE;
383         } else {
384           /* off-diagonal term must be 0 */
385           if (T[i+j*p] != 0.0) isTI = PETSC_FALSE;
386         }
387       }
388     }
389   }
390   a->isTI = isTI;
391 
392   ierr = PetscFree(a->T);CHKERRQ(ierr);
393   if (T && (!isTI)) {
394     ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->T);CHKERRQ(ierr);
395     ierr = PetscMemcpy(a->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
396   } else a->T = NULL;
397 
398   a->p = p;
399   a->q = q;
400   PetscFunctionReturn(0);
401 }
402 
403 PetscErrorCode MatDestroy_SeqKAIJ(Mat A)
404 {
405   PetscErrorCode ierr;
406   Mat_SeqKAIJ    *b = (Mat_SeqKAIJ*)A->data;
407 
408   PetscFunctionBegin;
409   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
410   ierr = PetscFree(b->S);CHKERRQ(ierr);
411   ierr = PetscFree(b->T);CHKERRQ(ierr);
412   ierr = PetscFree(b->ibdiag);CHKERRQ(ierr);
413   ierr = PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr);CHKERRQ(ierr);
414   ierr = PetscFree(A->data);CHKERRQ(ierr);
415   PetscFunctionReturn(0);
416 }
417 
418 PetscErrorCode MatSetUp_KAIJ(Mat A)
419 {
420   PetscErrorCode ierr;
421   PetscInt       n;
422   PetscMPIInt    size;
423   Mat_SeqKAIJ    *seqkaij = (Mat_SeqKAIJ*)A->data;
424 
425   PetscFunctionBegin;
426   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
427   if (size == 1) {
428     ierr = MatSetSizes(A,seqkaij->p*seqkaij->AIJ->rmap->n,seqkaij->q*seqkaij->AIJ->cmap->n,seqkaij->p*seqkaij->AIJ->rmap->N,seqkaij->q*seqkaij->AIJ->cmap->N);CHKERRQ(ierr);
429     ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr);
430     ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr);
431     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
432     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
433   } else {
434     Mat_MPIKAIJ *a;
435     Mat_MPIAIJ  *mpiaij;
436     IS          from,to;
437     Vec         gvec;
438     PetscScalar *T;
439     PetscInt    i,j;
440 
441     a = (Mat_MPIKAIJ*)A->data;
442     mpiaij = (Mat_MPIAIJ*)a->A->data;
443     ierr = MatSetSizes(A,a->p*a->A->rmap->n,a->q*a->A->cmap->n,a->p*a->A->rmap->N,a->q*a->A->cmap->N);CHKERRQ(ierr);
444     ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr);
445     ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr);
446     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
447     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
448 
449     if (a->isTI) {
450       /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL.
451        * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will
452        * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix
453        * to pass in. */
454       ierr = PetscMalloc1(a->p*a->q*sizeof(PetscScalar),&T);CHKERRQ(ierr);
455       for (i=0; i<a->p; i++) {
456         for (j=0; j<a->q; j++) {
457           if (i==j) T[i+j*a->p] = 1.0;
458           else      T[i+j*a->p] = 0.0;
459         }
460       }
461     } else T = a->T;
462     ierr = MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ);CHKERRQ(ierr);
463     ierr = MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ);CHKERRQ(ierr);
464     ierr = PetscFree(T);CHKERRQ(ierr);
465 
466     ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
467     ierr = VecCreate(PETSC_COMM_SELF,&a->w);CHKERRQ(ierr);
468     ierr = VecSetSizes(a->w,n*a->q,n*a->q);CHKERRQ(ierr);
469     ierr = VecSetBlockSize(a->w,a->q);CHKERRQ(ierr);
470     ierr = VecSetType(a->w,VECSEQ);CHKERRQ(ierr);
471 
472     /* create two temporary Index sets for build scatter gather */
473     ierr = ISCreateBlock(PetscObjectComm((PetscObject)a->A),a->q,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
474     ierr = ISCreateStride(PETSC_COMM_SELF,n*a->q,0,1,&to);CHKERRQ(ierr);
475 
476     /* create temporary global vector to generate scatter context */
477     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A),a->q,a->q*a->A->cmap->n,a->q*a->A->cmap->N,NULL,&gvec);CHKERRQ(ierr);
478 
479     /* generate the scatter context */
480     ierr = VecScatterCreate(gvec,from,a->w,to,&a->ctx);CHKERRQ(ierr);
481 
482     ierr = ISDestroy(&from);CHKERRQ(ierr);
483     ierr = ISDestroy(&to);CHKERRQ(ierr);
484     ierr = VecDestroy(&gvec);CHKERRQ(ierr);
485   }
486 
487   A->assembled = PETSC_TRUE;
488   PetscFunctionReturn(0);
489 }
490 
491 PetscErrorCode MatView_SeqKAIJ(Mat A,PetscViewer viewer)
492 {
493   PetscErrorCode ierr;
494   Mat            B;
495 
496   PetscFunctionBegin;
497   ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
498   ierr = MatView(B,viewer);CHKERRQ(ierr);
499   ierr = MatDestroy(&B);CHKERRQ(ierr);
500   PetscFunctionReturn(0);
501 }
502 
503 PetscErrorCode MatView_MPIKAIJ(Mat A,PetscViewer viewer)
504 {
505   PetscErrorCode ierr;
506   Mat            B;
507 
508   PetscFunctionBegin;
509   ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
510   ierr = MatView(B,viewer);CHKERRQ(ierr);
511   ierr = MatDestroy(&B);CHKERRQ(ierr);
512   PetscFunctionReturn(0);
513 }
514 
515 PetscErrorCode MatDestroy_MPIKAIJ(Mat A)
516 {
517   PetscErrorCode ierr;
518   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
519 
520   PetscFunctionBegin;
521   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
522   ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr);
523   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
524   ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr);
525   ierr = VecDestroy(&b->w);CHKERRQ(ierr);
526   ierr = PetscFree(b->S);CHKERRQ(ierr);
527   ierr = PetscFree(b->T);CHKERRQ(ierr);
528   ierr = PetscFree(b->ibdiag);CHKERRQ(ierr);
529   ierr = PetscFree(A->data);CHKERRQ(ierr);
530   PetscFunctionReturn(0);
531 }
532 
533 /* --------------------------------------------------------------------------------------*/
534 
535 /* zz = yy + Axx */
536 PetscErrorCode KAIJMultAdd_Seq(Mat A,Vec xx,Vec yy,Vec zz)
537 {
538   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ*)A->data;
539   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
540   const PetscScalar *s = b->S, *t = b->T;
541   const PetscScalar *x,*v,*bx;
542   PetscScalar       *y,*sums;
543   PetscErrorCode    ierr;
544   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
545   PetscInt          n,i,jrow,j,l,p=b->p,q=b->q,k;
546 
547   PetscFunctionBegin;
548   if (!yy) {
549     ierr = VecSet(zz,0.0);CHKERRQ(ierr);
550   } else {
551     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
552   }
553   if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0);
554 
555   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
556   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
557   idx  = a->j;
558   v    = a->a;
559   ii   = a->i;
560 
561   if (b->isTI) {
562     for (i=0; i<m; i++) {
563       jrow = ii[i];
564       n    = ii[i+1] - jrow;
565       sums = y + p*i;
566       for (j=0; j<n; j++) {
567         for (k=0; k<p; k++) {
568           sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k];
569         }
570       }
571     }
572   } else if (t) {
573     for (i=0; i<m; i++) {
574       jrow = ii[i];
575       n    = ii[i+1] - jrow;
576       sums = y + p*i;
577       bx   = x + q*i;
578       for (j=0; j<n; j++) {
579         for (k=0; k<p; k++) {
580           for (l=0; l<q; l++) {
581             sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l];
582           }
583         }
584       }
585     }
586   }
587   if (s) {
588     for (i=0; i<m; i++) {
589       jrow = ii[i];
590       n    = ii[i+1] - jrow;
591       sums = y + p*i;
592       bx   = x + q*i;
593       if (i < b->AIJ->cmap->n) {
594         for (j=0; j<q; j++) {
595           for (k=0; k<p; k++) {
596             sums[k] += s[k+j*p]*bx[j];
597           }
598         }
599       }
600     }
601   }
602 
603   ierr = PetscLogFlops((2.0*p*q-p)*m+2*p*a->nz);CHKERRQ(ierr);
604   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
605   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
606   PetscFunctionReturn(0);
607 }
608 
609 PetscErrorCode MatMult_SeqKAIJ_N(Mat A,Vec xx,Vec yy)
610 {
611   PetscErrorCode ierr;
612   PetscFunctionBegin;
613   ierr = KAIJMultAdd_Seq(A,xx,PETSC_NULL,yy);CHKERRQ(ierr);
614   PetscFunctionReturn(0);
615 }
616 
617 PetscErrorCode MatMultAdd_SeqKAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
618 {
619   PetscErrorCode ierr;
620   PetscFunctionBegin;
621   ierr = KAIJMultAdd_Seq(A,xx,yy,zz);CHKERRQ(ierr);
622   PetscFunctionReturn(0);
623 }
624 
625 #include <petsc/private/kernels/blockinvert.h>
626 
627 PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ_N(Mat A,const PetscScalar **values)
628 {
629   Mat_SeqKAIJ       *b  = (Mat_SeqKAIJ*)A->data;
630   Mat_SeqAIJ        *a  = (Mat_SeqAIJ*)b->AIJ->data;
631   const PetscScalar *S  = b->S;
632   const PetscScalar *T  = b->T;
633   const PetscScalar *v  = a->a;
634   const PetscInt     p  = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i;
635   PetscErrorCode    ierr;
636   PetscInt          i,j,*v_pivots,dof,dof2;
637   PetscScalar       *diag,aval,*v_work;
638 
639   PetscFunctionBegin;
640   if (p != q) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse.");
641   if ((!S) && (!T) && (!b->isTI)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix.");
642 
643   dof  = p;
644   dof2 = dof*dof;
645 
646   if (b->ibdiagvalid) {
647     if (values) *values = b->ibdiag;
648     PetscFunctionReturn(0);
649   }
650   if (!b->ibdiag) {
651     ierr = PetscMalloc1(dof2*m*sizeof(PetscScalar),&b->ibdiag);CHKERRQ(ierr);
652     ierr = PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar));CHKERRQ(ierr);
653   }
654   if (values) *values = b->ibdiag;
655   diag = b->ibdiag;
656 
657   ierr = PetscMalloc2(dof,&v_work,dof,&v_pivots);CHKERRQ(ierr);
658   for (i=0; i<m; i++) {
659     if (S) {
660       ierr = PetscMemcpy(diag,S,dof2*sizeof(PetscScalar));CHKERRQ(ierr);
661     } else {
662       ierr = PetscMemzero(diag,dof2*sizeof(PetscScalar));CHKERRQ(ierr);
663     }
664     if (b->isTI) {
665       aval = 0;
666       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
667       for (j=0; j<dof; j++) diag[j+dof*j] += aval;
668     } else if (T) {
669       aval = 0;
670       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
671       for (j=0; j<dof2; j++) diag[j] += aval*T[j];
672     }
673     ierr = PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL);CHKERRQ(ierr);
674     diag += dof2;
675   }
676   ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
677 
678   b->ibdiagvalid = PETSC_TRUE;
679   PetscFunctionReturn(0);
680 }
681 
682 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B)
683 {
684   Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data;
685 
686   PetscFunctionBegin;
687   *B = kaij->AIJ;
688   PetscFunctionReturn(0);
689 }
690 
691 PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
692 {
693   PetscErrorCode    ierr;
694   Mat_SeqKAIJ       *kaij = (Mat_SeqKAIJ*) A->data;
695   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)kaij->AIJ->data;
696   const PetscScalar *aa = a->a, *T = kaij->T, *v;
697   const PetscInt    m  = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi;
698   const PetscScalar *b, *xb, *idiag;
699   PetscScalar       *x, *work, *workt, *w, *y, *arr, *t, *arrt;
700   PetscInt          i, j, k, i2, bs, bs2, nz;
701 
702   PetscFunctionBegin;
703   its = its*lits;
704   if (flag & SOR_EISENSTAT) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
705   if (its <= 0)             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
706   if (fshift)               SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
707   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
708   if (p != q) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: Sorry, no support for non-square dense blocks");
709   else        {bs = p; bs2 = bs*bs; }
710 
711   if (!m) PetscFunctionReturn(0);
712 
713   if (!kaij->ibdiagvalid) { ierr = MatInvertBlockDiagonal_SeqKAIJ_N(A,NULL);CHKERRQ(ierr); }
714   idiag = kaij->ibdiag;
715   diag  = a->diag;
716 
717   if (!kaij->sor.setup) {
718     ierr = PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr);CHKERRQ(ierr);
719     kaij->sor.setup = PETSC_TRUE;
720   }
721   y     = kaij->sor.y;
722   w     = kaij->sor.w;
723   work  = kaij->sor.work;
724   t     = kaij->sor.t;
725   arr   = kaij->sor.arr;
726 
727   ierr = VecGetArray(xx,&x);    CHKERRQ(ierr);
728   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
729 
730   if (flag & SOR_ZERO_INITIAL_GUESS) {
731     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
732       PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);                            /* x[0:bs] <- D^{-1} b[0:bs] */
733       ierr   =  PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
734       i2     =  bs;
735       idiag  += bs2;
736       for (i=1; i<m; i++) {
737         v  = aa + ai[i];
738         vi = aj + ai[i];
739         nz = diag[i] - ai[i];
740 
741         if (T) {                /* b - T (Arow * x) */
742           for (k=0; k<bs; k++) w[k] = 0;
743           for (j=0; j<nz; j++) {
744             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
745           }
746           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]);
747           for (k=0; k<bs; k++) t[i2+k] += b[i2+k];
748         } else if (kaij->isTI) {
749           for (k=0; k<bs; k++) t[i2+k] = b[i2+k];
750           for (j=0; j<nz; j++) {
751             for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k];
752           }
753         } else {
754           for (k=0; k<bs; k++) t[i2+k] = b[i2+k];
755         }
756 
757         PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y);
758         for (j=0; j<bs; j++) x[i2+j] = omega * y[j];
759 
760         idiag += bs2;
761         i2    += bs;
762       }
763       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
764       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
765       xb = t;
766     } else xb = b;
767     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
768       idiag = kaij->ibdiag+bs2*(m-1);
769       i2    = bs * (m-1);
770       ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
771       PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
772       i2    -= bs;
773       idiag -= bs2;
774       for (i=m-2; i>=0; i--) {
775         v  = aa + diag[i] + 1 ;
776         vi = aj + diag[i] + 1;
777         nz = ai[i+1] - diag[i] - 1;
778 
779         if (T) {                /* FIXME: This branch untested */
780           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
781           /* copy all rows of x that are needed into contiguous space */
782           workt = work;
783           for (j=0; j<nz; j++) {
784             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
785             workt += bs;
786           }
787           arrt = arr;
788           for (j=0; j<nz; j++) {
789             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
790             for (k=0; k<bs2; k++) arrt[k] *= v[j];
791             arrt += bs2;
792           }
793           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
794         } else if (kaij->isTI) {
795           for (k=0; k<bs; k++) w[k] = t[i2+k];
796           for (j=0; j<nz; j++) {
797             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
798           }
799         }
800 
801         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */
802         for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j];
803 
804         idiag -= bs2;
805         i2    -= bs;
806       }
807       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
808     }
809     its--;
810   }
811   while (its--) {               /* FIXME: This branch not updated */
812     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
813       i2     =  0;
814       idiag  = kaij->ibdiag;
815       for (i=0; i<m; i++) {
816         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
817 
818         v  = aa + ai[i];
819         vi = aj + ai[i];
820         nz = diag[i] - ai[i];
821         workt = work;
822         for (j=0; j<nz; j++) {
823           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
824           workt += bs;
825         }
826         arrt = arr;
827         if (T) {
828           for (j=0; j<nz; j++) {
829             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
830             for (k=0; k<bs2; k++) arrt[k] *= v[j];
831             arrt += bs2;
832           }
833           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
834         } else if (kaij->isTI) {
835           for (j=0; j<nz; j++) {
836             ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
837             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
838             arrt += bs2;
839           }
840           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
841         }
842         ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
843 
844         v  = aa + diag[i] + 1;
845         vi = aj + diag[i] + 1;
846         nz = ai[i+1] - diag[i] - 1;
847         workt = work;
848         for (j=0; j<nz; j++) {
849           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
850           workt += bs;
851         }
852         arrt = arr;
853         if (T) {
854           for (j=0; j<nz; j++) {
855             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
856             for (k=0; k<bs2; k++) arrt[k] *= v[j];
857             arrt += bs2;
858           }
859           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
860         } else if (kaij->isTI) {
861           for (j=0; j<nz; j++) {
862             ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
863             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
864             arrt += bs2;
865           }
866           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
867         }
868 
869         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
870         for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
871 
872         idiag += bs2;
873         i2    += bs;
874       }
875       xb = t;
876     }
877     else xb = b;
878     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
879       idiag = kaij->ibdiag+bs2*(m-1);
880       i2    = bs * (m-1);
881       if (xb == b) {
882         for (i=m-1; i>=0; i--) {
883           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
884 
885           v  = aa + ai[i];
886           vi = aj + ai[i];
887           nz = diag[i] - ai[i];
888           workt = work;
889           for (j=0; j<nz; j++) {
890             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
891             workt += bs;
892           }
893           arrt = arr;
894           if (T) {
895             for (j=0; j<nz; j++) {
896               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
897               for (k=0; k<bs2; k++) arrt[k] *= v[j];
898               arrt += bs2;
899             }
900             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
901           } else if (kaij->isTI) {
902             for (j=0; j<nz; j++) {
903               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
904               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
905               arrt += bs2;
906             }
907             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
908           }
909 
910           v  = aa + diag[i] + 1;
911           vi = aj + diag[i] + 1;
912           nz = ai[i+1] - diag[i] - 1;
913           workt = work;
914           for (j=0; j<nz; j++) {
915             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
916             workt += bs;
917           }
918           arrt = arr;
919           if (T) {
920             for (j=0; j<nz; j++) {
921               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
922               for (k=0; k<bs2; k++) arrt[k] *= v[j];
923               arrt += bs2;
924             }
925             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
926           } else if (kaij->isTI) {
927             for (j=0; j<nz; j++) {
928               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
929               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
930               arrt += bs2;
931             }
932             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
933           }
934 
935           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
936           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
937         }
938       } else {
939         for (i=m-1; i>=0; i--) {
940           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
941           v  = aa + diag[i] + 1;
942           vi = aj + diag[i] + 1;
943           nz = ai[i+1] - diag[i] - 1;
944           workt = work;
945           for (j=0; j<nz; j++) {
946             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
947             workt += bs;
948           }
949           arrt = arr;
950           if (T) {
951             for (j=0; j<nz; j++) {
952               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
953               for (k=0; k<bs2; k++) arrt[k] *= v[j];
954               arrt += bs2;
955             }
956             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
957           } else if (kaij->isTI) {
958             for (j=0; j<nz; j++) {
959               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
960               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
961               arrt += bs2;
962             }
963             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
964           }
965           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
966           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
967         }
968         idiag -= bs2;
969         i2    -= bs;
970       }
971       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
972     }
973   }
974 
975   ierr = VecRestoreArray(xx,&x);    CHKERRQ(ierr);
976   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
977   PetscFunctionReturn(0);
978 }
979 
980 /*===================================================================================*/
981 
982 PetscErrorCode KAIJMultAdd_MPI(Mat A,Vec xx,Vec yy,Vec zz)
983 {
984   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
985   PetscErrorCode ierr;
986 
987   PetscFunctionBegin;
988   if (!yy) {
989     ierr = VecSet(zz,0.0);CHKERRQ(ierr);
990   } else {
991     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
992   }
993   /* start the scatter */
994   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
995   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz);CHKERRQ(ierr);
996   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
997   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
998   PetscFunctionReturn(0);
999 }
1000 
1001 PetscErrorCode MatMult_MPIKAIJ_dof(Mat A,Vec xx,Vec yy)
1002 {
1003   PetscErrorCode ierr;
1004   PetscFunctionBegin;
1005   ierr = KAIJMultAdd_MPI(A,xx,PETSC_NULL,yy);CHKERRQ(ierr);
1006   PetscFunctionReturn(0);
1007 }
1008 
1009 PetscErrorCode MatMultAdd_MPIKAIJ_dof(Mat A,Vec xx,Vec yy, Vec zz)
1010 {
1011   PetscErrorCode ierr;
1012   PetscFunctionBegin;
1013   ierr = KAIJMultAdd_MPI(A,xx,yy,zz);CHKERRQ(ierr);
1014   PetscFunctionReturn(0);
1015 }
1016 
1017 PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ_dof(Mat A,const PetscScalar **values)
1018 {
1019   Mat_MPIKAIJ     *b = (Mat_MPIKAIJ*)A->data;
1020   PetscErrorCode  ierr;
1021 
1022   PetscFunctionBegin;
1023   ierr = (*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values);CHKERRQ(ierr);
1024   PetscFunctionReturn(0);
1025 }
1026 
1027 /* ----------------------------------------------------------------*/
1028 
1029 PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
1030 {
1031   Mat_SeqKAIJ     *b   = (Mat_SeqKAIJ*) A->data;
1032   PetscErrorCode  diag = PETSC_FALSE;
1033   PetscErrorCode  ierr;
1034   PetscInt        nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c;
1035   PetscScalar     *vaij,*v,*S=b->S,*T=b->T;
1036 
1037   PetscFunctionBegin;
1038   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1039   b->getrowactive = PETSC_TRUE;
1040   if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row);
1041 
1042   if ((!S) && (!T) && (!b->isTI)) {
1043     if (ncols)    *ncols  = 0;
1044     if (cols)     *cols   = NULL;
1045     if (values)   *values = NULL;
1046     PetscFunctionReturn(0);
1047   }
1048 
1049   if (T || b->isTI) {
1050     ierr  = MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij);CHKERRQ(ierr);
1051     c     = nzaij;
1052     for (i=0; i<nzaij; i++) {
1053       /* check if this row contains a diagonal entry */
1054       if (colsaij[i] == r) {
1055         diag = PETSC_TRUE;
1056         c = i;
1057       }
1058     }
1059   } else nzaij = c = 0;
1060 
1061   /* calculate size of row */
1062   nz = 0;
1063   if (S)            nz += q;
1064   if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q);
1065 
1066   if (cols || values) {
1067     ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr);
1068     for (i=0; i<q; i++) {
1069       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1070       v[i] = 0.0;
1071     }
1072     if (b->isTI) {
1073       for (i=0; i<nzaij; i++) {
1074         for (j=0; j<q; j++) {
1075           idx[i*q+j] = colsaij[i]*q+j;
1076           v[i*q+j]   = (j==s ? vaij[i] : 0);
1077         }
1078       }
1079     } else if (T) {
1080       for (i=0; i<nzaij; i++) {
1081         for (j=0; j<q; j++) {
1082           idx[i*q+j] = colsaij[i]*q+j;
1083           v[i*q+j]   = vaij[i]*T[s+j*p];
1084         }
1085       }
1086     }
1087     if (S) {
1088       for (j=0; j<q; j++) {
1089         idx[c*q+j] = r*q+j;
1090         v[c*q+j]  += S[s+j*p];
1091       }
1092     }
1093   }
1094 
1095   if (ncols)    *ncols  = nz;
1096   if (cols)     *cols   = idx;
1097   if (values)   *values = v;
1098   PetscFunctionReturn(0);
1099 }
1100 
1101 PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1102 {
1103   PetscErrorCode ierr;
1104   PetscFunctionBegin;
1105   ierr = PetscFree2(*idx,*v);CHKERRQ(ierr);
1106   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
1107   PetscFunctionReturn(0);
1108 }
1109 
1110 PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
1111 {
1112   Mat_MPIKAIJ     *b      = (Mat_MPIKAIJ*) A->data;
1113   Mat             MatAIJ  = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ;
1114   Mat             MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ;
1115   Mat             AIJ     = b->A;
1116   PetscBool       diag    = PETSC_FALSE;
1117   PetscErrorCode  ierr;
1118   const PetscInt  rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray;
1119   PetscInt        nz,*idx,ncolsaij,ncolsoaij,*colsaij,*colsoaij,r,s,c,i,j,lrow;
1120   PetscScalar     *v,*vals,*ovals,*S=b->S,*T=b->T;
1121 
1122   PetscFunctionBegin;
1123   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1124   b->getrowactive = PETSC_TRUE;
1125   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1126   lrow = row - rstart;
1127 
1128   if ((!S) && (!T) && (!b->isTI)) {
1129     if (ncols)    *ncols  = 0;
1130     if (cols)     *cols   = NULL;
1131     if (values)   *values = NULL;
1132     PetscFunctionReturn(0);
1133   }
1134 
1135   r = lrow/p;
1136   s = lrow%p;
1137 
1138   if (T || b->isTI) {
1139     ierr = MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray);
1140     ierr = MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals);CHKERRQ(ierr);
1141     ierr = MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals);CHKERRQ(ierr);
1142 
1143     c     = ncolsaij + ncolsoaij;
1144     for (i=0; i<ncolsaij; i++) {
1145       /* check if this row contains a diagonal entry */
1146       if (colsaij[i] == r) {
1147         diag = PETSC_TRUE;
1148         c = i;
1149       }
1150     }
1151   } else c = 0;
1152 
1153   /* calculate size of row */
1154   nz = 0;
1155   if (S)            nz += q;
1156   if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q);
1157 
1158   if (cols || values) {
1159     ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr);
1160     for (i=0; i<q; i++) {
1161       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1162       v[i] = 0.0;
1163     }
1164     if (b->isTI) {
1165       for (i=0; i<ncolsaij; i++) {
1166         for (j=0; j<q; j++) {
1167           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1168           v[i*q+j]   = (j==s ? vals[i] : 0.0);
1169         }
1170       }
1171       for (i=0; i<ncolsoaij; i++) {
1172         for (j=0; j<q; j++) {
1173           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1174           v[(i+ncolsaij)*q+j]   = (j==s ? ovals[i]: 0.0);
1175         }
1176       }
1177     } else if (T) {
1178       for (i=0; i<ncolsaij; i++) {
1179         for (j=0; j<q; j++) {
1180           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1181           v[i*q+j]   = vals[i]*T[s+j*p];
1182         }
1183       }
1184       for (i=0; i<ncolsoaij; i++) {
1185         for (j=0; j<q; j++) {
1186           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1187           v[(i+ncolsaij)*q+j]   = ovals[i]*T[s+j*p];
1188         }
1189       }
1190     }
1191     if (S) {
1192       for (j=0; j<q; j++) {
1193         idx[c*q+j] = (r+rstart/p)*q+j;
1194         v[c*q+j]  += S[s+j*p];
1195       }
1196     }
1197   }
1198 
1199   if (ncols)  *ncols  = nz;
1200   if (cols)   *cols   = idx;
1201   if (values) *values = v;
1202   PetscFunctionReturn(0);
1203 }
1204 
1205 PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1206 {
1207   PetscErrorCode ierr;
1208   PetscFunctionBegin;
1209   ierr = PetscFree2(*idx,*v);CHKERRQ(ierr);
1210   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
1211   PetscFunctionReturn(0);
1212 }
1213 
1214 PetscErrorCode  MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
1215 {
1216   PetscErrorCode ierr;
1217   Mat            A;
1218 
1219   PetscFunctionBegin;
1220   ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1221   ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr);
1222   ierr = MatDestroy(&A);CHKERRQ(ierr);
1223   PetscFunctionReturn(0);
1224 }
1225 
1226 /* ---------------------------------------------------------------------------------- */
1227 /*@C
1228   MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form:
1229 
1230     [I \otimes S + A \otimes T]
1231 
1232   where
1233     S is a dense (p \times q) matrix
1234     T is a dense (p \times q) matrix
1235     A is an AIJ  (n \times n) matrix
1236     I is the identity matrix
1237   The resulting matrix is (np \times nq)
1238 
1239   The matrix type is based on MATSEQAIJ for a sequential matrix A, and MATMPIAIJ for a distributed matrix A.
1240   S is always stored independently on all processes as a PetscScalar array in column-major format.
1241 
1242   Collective
1243 
1244   Input Parameters:
1245 + A - the AIJ matrix
1246 . S - the S matrix, stored as a PetscScalar array (column-major)
1247 . T - the T matrix, stored as a PetscScalar array (column-major)
1248 . p - number of rows in S and T
1249 - q - number of columns in S and T
1250 
1251   Output Parameter:
1252 . kaij - the new KAIJ matrix
1253 
1254   Operations provided:
1255 + MatMult
1256 . MatMultAdd
1257 . MatInvertBlockDiagonal
1258 - MatView
1259 
1260   Level: advanced
1261 
1262 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MATKAIJ
1263 @*/
1264 PetscErrorCode  MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij)
1265 {
1266   PetscErrorCode ierr;
1267   PetscMPIInt    size;
1268 
1269   PetscFunctionBegin;
1270   ierr = MatCreate(PetscObjectComm((PetscObject)A),kaij);CHKERRQ(ierr);
1271   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
1272   if (size == 1) {
1273     ierr = MatSetType(*kaij,MATSEQKAIJ);CHKERRQ(ierr);
1274   } else {
1275     ierr = MatSetType(*kaij,MATMPIKAIJ);CHKERRQ(ierr);
1276   }
1277   ierr = MatKAIJSetAIJ(*kaij,A);CHKERRQ(ierr);
1278   ierr = MatKAIJSetS(*kaij,p,q,S);CHKERRQ(ierr);
1279   ierr = MatKAIJSetT(*kaij,p,q,T);CHKERRQ(ierr);
1280   ierr = MatSetUp(*kaij);
1281   PetscFunctionReturn(0);
1282 }
1283 
1284 /*MC
1285   MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of the following form:
1286 
1287     [I \otimes S + A \otimes T]
1288 
1289   where
1290     S is a dense (p \times q) matrix
1291     T is a dense (p \times q) matrix
1292     A is an AIJ  (n \times n) matrix
1293     I is the identity matrix
1294   The resulting matrix is (np \times nq)
1295 
1296   The matrix type is based on MATSEQAIJ for a sequential matrix A, and MATMPIAIJ for a distributed matrix A.
1297   S and T are always stored independently on all processes as a PetscScalar array in column-major format.
1298 
1299   Operations provided:
1300 + MatMult
1301 . MatMultAdd
1302 . MatInvertBlockDiagonal
1303 - MatView
1304 
1305   Level: advanced
1306 
1307 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ()
1308 M*/
1309 
1310 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A)
1311 {
1312   PetscErrorCode ierr;
1313   Mat_MPIKAIJ    *b;
1314   PetscMPIInt    size;
1315 
1316   PetscFunctionBegin;
1317   ierr     = PetscNewLog(A,&b);CHKERRQ(ierr);
1318   A->data  = (void*)b;
1319 
1320   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1321 
1322   A->ops->setup = MatSetUp_KAIJ;
1323 
1324   b->w    = 0;
1325   ierr    = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
1326   if (size == 1) {
1327     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ);CHKERRQ(ierr);
1328     A->ops->setup               = MatSetUp_KAIJ;
1329     A->ops->destroy             = MatDestroy_SeqKAIJ;
1330     A->ops->view                = MatView_SeqKAIJ;
1331     A->ops->mult                = MatMult_SeqKAIJ_N;
1332     A->ops->multadd             = MatMultAdd_SeqKAIJ_N;
1333     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ_N;
1334     A->ops->getrow              = MatGetRow_SeqKAIJ;
1335     A->ops->restorerow          = MatRestoreRow_SeqKAIJ;
1336     A->ops->sor                 = MatSOR_SeqKAIJ;
1337   } else {
1338     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ);CHKERRQ(ierr);
1339     A->ops->setup               = MatSetUp_KAIJ;
1340     A->ops->destroy             = MatDestroy_MPIKAIJ;
1341     A->ops->view                = MatView_MPIKAIJ;
1342     A->ops->mult                = MatMult_MPIKAIJ_dof;
1343     A->ops->multadd             = MatMultAdd_MPIKAIJ_dof;
1344     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ_dof;
1345     A->ops->getrow              = MatGetRow_MPIKAIJ;
1346     A->ops->restorerow          = MatRestoreRow_MPIKAIJ;
1347     ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ);CHKERRQ(ierr);
1348   }
1349   A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ;
1350   PetscFunctionReturn(0);
1351 }
1352