xref: /petsc/src/mat/impls/kaij/kaij.c (revision a5b5c723ae2a227082d33696a25cd0d59883d64a)
1 
2 /*
3   Defines the basic matrix operations for the KAIJ  matrix storage format.
4   This format is used to evaluate matrices of the form:
5 
6     [I \otimes S + A \otimes T]
7 
8   where
9     S is a dense (p \times q) matrix
10     T is a dense (p \times q) matrix
11     A is an AIJ  (n \times n) matrix
12     I is the identity matrix
13 
14   The resulting matrix is (np \times nq)
15 
16   We provide:
17      MatMult()
18      MatMultAdd()
19      MatInvertBlockDiagonal()
20   and
21      MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*)
22 
23   This single directory handles both the sequential and parallel codes
24 */
25 
26 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/
27 #include <../src/mat/utils/freespace.h>
28 #include <petsc/private/vecimpl.h>
29 
30 /*@C
31    MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix
32 
33    Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel
34 
35    Input Parameter:
36 .  A - the KAIJ matrix
37 
38    Output Parameter:
39 .  B - the AIJ matrix
40 
41    Level: advanced
42 
43    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
44 
45 .seealso: MatCreateKAIJ()
46 @*/
47 PetscErrorCode  MatKAIJGetAIJ(Mat A,Mat *B)
48 {
49   PetscErrorCode ierr;
50   PetscBool      ismpikaij,isseqkaij;
51 
52   PetscFunctionBegin;
53   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr);
54   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij);CHKERRQ(ierr);
55   if (ismpikaij) {
56     Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
57 
58     *B = b->A;
59   } else if (isseqkaij) {
60     Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
61 
62     *B = b->AIJ;
63   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix passed in is not of type KAIJ");
64   PetscFunctionReturn(0);
65 }
66 
67 /*@C
68    MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix
69 
70    Not Collective; the entire S is stored and returned independently on all processes.
71 
72    Input Parameter:
73 .  A - the KAIJ matrix
74 
75    Output Parameters:
76 +  m - the number of rows in S
77 .  n - the number of columns in S
78 -  S - the S matrix, in form of a scalar array in column-major format
79 
80    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
81 
82    Level: advanced
83 
84 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
85 @*/
86 PetscErrorCode MatKAIJGetS(Mat A,PetscInt *m,PetscInt *n,PetscScalar **S)
87 {
88   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
89   PetscFunctionBegin;
90   if (m) *m = b->p;
91   if (n) *n = b->q;
92   if (S) *S = b->S;
93   PetscFunctionReturn(0);
94 }
95 
96 /*@C
97    MatKAIJGetSRead - Get a read-only pointer to the S matrix describing the shift action of the KAIJ matrix
98 
99    Not Collective; the entire S is stored and returned independently on all processes.
100 
101    Input Parameter:
102 .  A - the KAIJ matrix
103 
104    Output Parameters:
105 +  m - the number of rows in S
106 .  n - the number of columns in S
107 -  S - the S matrix, in form of a scalar array in column-major format
108 
109    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
110 
111    Level: advanced
112 
113 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
114 @*/
115 PetscErrorCode MatKAIJGetSRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **S)
116 {
117   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
118   PetscFunctionBegin;
119   if (m) *m = b->p;
120   if (n) *n = b->q;
121   if (S) *S = b->S;
122   PetscFunctionReturn(0);
123 }
124 
125 /*@C
126   MatKAIJRestoreS - Restore array obtained with MatKAIJGetS()
127 
128   Not collective
129 
130   Input Parameter:
131 . A - the KAIJ matrix
132 
133   Output Parameter:
134 . S - location of pointer to array obtained with MatKAIJGetS()
135 
136   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
137   If NULL is passed, it will not attempt to zero the array pointer.
138 
139   Level: advanced
140 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead()
141 @*/
142 PetscErrorCode MatKAIJRestoreS(Mat A,PetscScalar **S)
143 {
144   PetscFunctionBegin;
145   if (S) *S = NULL;
146   PetscObjectStateIncrease((PetscObject)A);
147   PetscFunctionReturn(0);
148 }
149 
150 /*@C
151   MatKAIJRestoreSRead - Restore array obtained with MatKAIJGetSRead()
152 
153   Not collective
154 
155   Input Parameter:
156 . A - the KAIJ matrix
157 
158   Output Parameter:
159 . S - location of pointer to array obtained with MatKAIJGetS()
160 
161   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
162   If NULL is passed, it will not attempt to zero the array pointer.
163 
164   Level: advanced
165 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead()
166 @*/
167 PetscErrorCode MatKAIJRestoreSRead(Mat A,const PetscScalar **S)
168 {
169   PetscFunctionBegin;
170   if (S) *S = NULL;
171   PetscFunctionReturn(0);
172 }
173 
174 /*@C
175    MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix
176 
177    Not Collective; the entire T is stored and returned independently on all processes
178 
179    Input Parameter:
180 .  A - the KAIJ matrix
181 
182    Output Parameter:
183 +  m - the number of rows in T
184 .  n - the number of columns in T
185 -  T - the T matrix, in form of a scalar array in column-major format
186 
187    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
188 
189    Level: advanced
190 
191 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
192 @*/
193 PetscErrorCode MatKAIJGetT(Mat A,PetscInt *m,PetscInt *n,PetscScalar **T)
194 {
195   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
196   PetscFunctionBegin;
197   if (m) *m = b->p;
198   if (n) *n = b->q;
199   if (T) *T = b->T;
200   PetscFunctionReturn(0);
201 }
202 
203 /*@C
204    MatKAIJGetTRead - Get a read-only pointer to the transformation matrix T associated with the KAIJ matrix
205 
206    Not Collective; the entire T is stored and returned independently on all processes
207 
208    Input Parameter:
209 .  A - the KAIJ matrix
210 
211    Output Parameter:
212 +  m - the number of rows in T
213 .  n - the number of columns in T
214 -  T - the T matrix, in form of a scalar array in column-major format
215 
216    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
217 
218    Level: advanced
219 
220 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
221 @*/
222 PetscErrorCode MatKAIJGetTRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **T)
223 {
224   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
225   PetscFunctionBegin;
226   if (m) *m = b->p;
227   if (n) *n = b->q;
228   if (T) *T = b->T;
229   PetscFunctionReturn(0);
230 }
231 
232 /*@C
233   MatKAIJRestoreT - Restore array obtained with MatKAIJGetT()
234 
235   Not collective
236 
237   Input Parameter:
238 . A - the KAIJ matrix
239 
240   Output Parameter:
241 . T - location of pointer to array obtained with MatKAIJGetS()
242 
243   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
244   If NULL is passed, it will not attempt to zero the array pointer.
245 
246   Level: advanced
247 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead()
248 @*/
249 PetscErrorCode MatKAIJRestoreT(Mat A,PetscScalar **T)
250 {
251   PetscFunctionBegin;
252   if (T) *T = NULL;
253   PetscObjectStateIncrease((PetscObject)A);
254   PetscFunctionReturn(0);
255 }
256 
257 /*@C
258   MatKAIJRestoreTRead - Restore array obtained with MatKAIJGetTRead()
259 
260   Not collective
261 
262   Input Parameter:
263 . A - the KAIJ matrix
264 
265   Output Parameter:
266 . T - location of pointer to array obtained with MatKAIJGetS()
267 
268   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
269   If NULL is passed, it will not attempt to zero the array pointer.
270 
271   Level: advanced
272 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead()
273 @*/
274 PetscErrorCode MatKAIJRestoreTRead(Mat A,const PetscScalar **T)
275 {
276   PetscFunctionBegin;
277   if (T) *T = NULL;
278   PetscFunctionReturn(0);
279 }
280 
281 /*@
282    MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix
283 
284    Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel
285 
286    Input Parameters:
287 +  A - the KAIJ matrix
288 -  B - the AIJ matrix
289 
290    Level: advanced
291 
292 .seealso: MatKAIJGetAIJ(), MatKAIJSetS(), MatKAIJSetT()
293 @*/
294 PetscErrorCode MatKAIJSetAIJ(Mat A,Mat B)
295 {
296   PetscErrorCode ierr;
297   PetscMPIInt    size;
298 
299   PetscFunctionBegin;
300   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
301   if (size == 1) {
302     Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
303     a->AIJ = B;
304   } else {
305     Mat_MPIKAIJ *a = (Mat_MPIKAIJ*)A->data;
306     a->A = B;
307   }
308   PetscFunctionReturn(0);
309 }
310 
311 /*@C
312    MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix
313 
314    Logically Collective; the entire S is stored independently on all processes.
315 
316    Input Parameters:
317 +  A - the KAIJ matrix
318 .  p - the number of rows in S
319 .  q - the number of columns in S
320 -  S - the S matrix, in form of a scalar array in column-major format
321 
322    Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix.
323 
324    Level: Advanced
325 
326 .seealso: MatKAIJGetS(), MatKAIJSetT(), MatKAIJSetAIJ()
327 @*/
328 PetscErrorCode MatKAIJSetS(Mat A,PetscInt p,PetscInt q,const PetscScalar S[])
329 {
330   PetscErrorCode ierr;
331   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
332 
333   PetscFunctionBegin;
334   ierr = PetscFree(a->S);CHKERRQ(ierr);
335   if (S) {
336     ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->S);CHKERRQ(ierr);
337     ierr = PetscMemcpy(a->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
338   } else  a->S = NULL;
339 
340   a->p = p;
341   a->q = q;
342   PetscFunctionReturn(0);
343 }
344 
345 /*@C
346    MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix
347 
348    Logically Collective; the entire T is stored independently on all processes.
349 
350    Input Parameters:
351 +  A - the KAIJ matrix
352 .  p - the number of rows in S
353 .  q - the number of columns in S
354 -  T - the T matrix, in form of a scalar array in column-major format
355 
356    Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix.
357 
358    Level: Advanced
359 
360 .seealso: MatKAIJGetT(), MatKAIJSetS(), MatKAIJSetAIJ()
361 @*/
362 PetscErrorCode MatKAIJSetT(Mat A,PetscInt p,PetscInt q,const PetscScalar T[])
363 {
364   PetscErrorCode ierr;
365   PetscInt       i,j;
366   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
367   PetscBool      isTI = PETSC_FALSE;
368 
369   PetscFunctionBegin;
370   /* check if T is an identity matrix */
371   if (T && (p == q)) {
372     isTI = PETSC_TRUE;
373     for (i=0; i<p; i++) {
374       for (j=0; j<q; j++) {
375         if (i == j) {
376           /* diagonal term must be 1 */
377           if (T[i+j*p] != 1.0) isTI = PETSC_FALSE;
378         } else {
379           /* off-diagonal term must be 0 */
380           if (T[i+j*p] != 0.0) isTI = PETSC_FALSE;
381         }
382       }
383     }
384   }
385   a->isTI = isTI;
386 
387   ierr = PetscFree(a->T);CHKERRQ(ierr);
388   if (T && (!isTI)) {
389     ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->T);CHKERRQ(ierr);
390     ierr = PetscMemcpy(a->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
391   } else a->T = NULL;
392 
393   a->p = p;
394   a->q = q;
395   PetscFunctionReturn(0);
396 }
397 
398 PetscErrorCode MatDestroy_SeqKAIJ(Mat A)
399 {
400   PetscErrorCode ierr;
401   Mat_SeqKAIJ    *b = (Mat_SeqKAIJ*)A->data;
402 
403   PetscFunctionBegin;
404   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
405   ierr = PetscFree(b->S);CHKERRQ(ierr);
406   ierr = PetscFree(b->T);CHKERRQ(ierr);
407   ierr = PetscFree(b->ibdiag);CHKERRQ(ierr);
408   ierr = PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr);CHKERRQ(ierr);
409   ierr = PetscFree(A->data);CHKERRQ(ierr);
410   PetscFunctionReturn(0);
411 }
412 
413 PetscErrorCode MatSetUp_KAIJ(Mat A)
414 {
415   PetscErrorCode ierr;
416   PetscInt       n;
417   PetscMPIInt    size;
418   Mat_SeqKAIJ    *seqkaij = (Mat_SeqKAIJ*)A->data;
419 
420   PetscFunctionBegin;
421   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
422   if (size == 1) {
423     ierr = MatSetSizes(A,seqkaij->p*seqkaij->AIJ->rmap->n,seqkaij->q*seqkaij->AIJ->cmap->n,seqkaij->p*seqkaij->AIJ->rmap->N,seqkaij->q*seqkaij->AIJ->cmap->N);CHKERRQ(ierr);
424     ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr);
425     ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr);
426     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
427     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
428     ierr = PetscObjectReference((PetscObject)seqkaij->AIJ);CHKERRQ(ierr);
429   } else {
430     Mat_MPIKAIJ *a;
431     Mat_MPIAIJ  *mpiaij;
432     IS          from,to;
433     Vec         gvec;
434     PetscScalar *T;
435     PetscInt    i,j;
436 
437     a = (Mat_MPIKAIJ*)A->data;
438     mpiaij = (Mat_MPIAIJ*)a->A->data;
439     ierr = MatSetSizes(A,a->p*a->A->rmap->n,a->q*a->A->cmap->n,a->p*a->A->rmap->N,a->q*a->A->cmap->N);CHKERRQ(ierr);
440     ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr);
441     ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr);
442     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
443     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
444     ierr = PetscObjectReference((PetscObject)a->A);CHKERRQ(ierr);
445 
446     if (a->isTI) {
447       /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL.
448        * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will
449        * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix
450        * to pass in. */
451       ierr = PetscMalloc1(a->p*a->q*sizeof(PetscScalar),&T);CHKERRQ(ierr);
452       for (i=0; i<a->p; i++) {
453         for (j=0; j<a->q; j++) {
454           if (i==j) T[i+j*a->p] = 1.0;
455           else      T[i+j*a->p] = 0.0;
456         }
457       }
458     } else T = a->T;
459     ierr = MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ);CHKERRQ(ierr);
460     ierr = MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ);CHKERRQ(ierr);
461     ierr = PetscFree(T);CHKERRQ(ierr);
462 
463     ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
464     ierr = VecCreate(PETSC_COMM_SELF,&a->w);CHKERRQ(ierr);
465     ierr = VecSetSizes(a->w,n*a->q,n*a->q);CHKERRQ(ierr);
466     ierr = VecSetBlockSize(a->w,a->q);CHKERRQ(ierr);
467     ierr = VecSetType(a->w,VECSEQ);CHKERRQ(ierr);
468 
469     /* create two temporary Index sets for build scatter gather */
470     ierr = ISCreateBlock(PetscObjectComm((PetscObject)a->A),a->q,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
471     ierr = ISCreateStride(PETSC_COMM_SELF,n*a->q,0,1,&to);CHKERRQ(ierr);
472 
473     /* create temporary global vector to generate scatter context */
474     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A),a->q,a->q*a->A->cmap->n,a->q*a->A->cmap->N,NULL,&gvec);CHKERRQ(ierr);
475 
476     /* generate the scatter context */
477     ierr = VecScatterCreate(gvec,from,a->w,to,&a->ctx);CHKERRQ(ierr);
478 
479     ierr = ISDestroy(&from);CHKERRQ(ierr);
480     ierr = ISDestroy(&to);CHKERRQ(ierr);
481     ierr = VecDestroy(&gvec);CHKERRQ(ierr);
482   }
483 
484   A->assembled = PETSC_TRUE;
485   PetscFunctionReturn(0);
486 }
487 
488 PetscErrorCode MatView_SeqKAIJ(Mat A,PetscViewer viewer)
489 {
490   PetscErrorCode ierr;
491   Mat            B;
492 
493   PetscFunctionBegin;
494   ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
495   ierr = MatView(B,viewer);CHKERRQ(ierr);
496   ierr = MatDestroy(&B);CHKERRQ(ierr);
497   PetscFunctionReturn(0);
498 }
499 
500 PetscErrorCode MatView_MPIKAIJ(Mat A,PetscViewer viewer)
501 {
502   PetscErrorCode ierr;
503   Mat            B;
504 
505   PetscFunctionBegin;
506   ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
507   ierr = MatView(B,viewer);CHKERRQ(ierr);
508   ierr = MatDestroy(&B);CHKERRQ(ierr);
509   PetscFunctionReturn(0);
510 }
511 
512 PetscErrorCode MatDestroy_MPIKAIJ(Mat A)
513 {
514   PetscErrorCode ierr;
515   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
516 
517   PetscFunctionBegin;
518   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
519   ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr);
520   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
521   ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr);
522   ierr = VecDestroy(&b->w);CHKERRQ(ierr);
523   ierr = PetscFree(b->S);CHKERRQ(ierr);
524   ierr = PetscFree(b->T);CHKERRQ(ierr);
525   ierr = PetscFree(b->ibdiag);CHKERRQ(ierr);
526   ierr = PetscFree(A->data);CHKERRQ(ierr);
527   PetscFunctionReturn(0);
528 }
529 
530 /* --------------------------------------------------------------------------------------*/
531 
532 /* zz = yy + Axx */
533 PetscErrorCode KAIJMultAdd_Seq(Mat A,Vec xx,Vec yy,Vec zz)
534 {
535   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ*)A->data;
536   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
537   const PetscScalar *s = b->S, *t = b->T;
538   const PetscScalar *x,*v,*bx;
539   PetscScalar       *y,*sums;
540   PetscErrorCode    ierr;
541   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
542   PetscInt          n,i,jrow,j,l,p=b->p,q=b->q,k;
543 
544   PetscFunctionBegin;
545   if (!yy) {
546     ierr = VecSet(zz,0.0);CHKERRQ(ierr);
547   } else {
548     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
549   }
550   if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0);
551 
552   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
553   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
554   idx  = a->j;
555   v    = a->a;
556   ii   = a->i;
557 
558   if (b->isTI) {
559     for (i=0; i<m; i++) {
560       jrow = ii[i];
561       n    = ii[i+1] - jrow;
562       sums = y + p*i;
563       for (j=0; j<n; j++) {
564         for (k=0; k<p; k++) {
565           sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k];
566         }
567       }
568     }
569   } else if (t) {
570     for (i=0; i<m; i++) {
571       jrow = ii[i];
572       n    = ii[i+1] - jrow;
573       sums = y + p*i;
574       bx   = x + q*i;
575       for (j=0; j<n; j++) {
576         for (k=0; k<p; k++) {
577           for (l=0; l<q; l++) {
578             sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l];
579           }
580         }
581       }
582     }
583   }
584   if (s) {
585     for (i=0; i<m; i++) {
586       jrow = ii[i];
587       n    = ii[i+1] - jrow;
588       sums = y + p*i;
589       bx   = x + q*i;
590       if (i < b->AIJ->cmap->n) {
591         for (j=0; j<q; j++) {
592           for (k=0; k<p; k++) {
593             sums[k] += s[k+j*p]*bx[j];
594           }
595         }
596       }
597     }
598   }
599 
600   ierr = PetscLogFlops((2.0*p*q-p)*m+2*p*a->nz);CHKERRQ(ierr);
601   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
602   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
603   PetscFunctionReturn(0);
604 }
605 
606 PetscErrorCode MatMult_SeqKAIJ_N(Mat A,Vec xx,Vec yy)
607 {
608   PetscErrorCode ierr;
609   PetscFunctionBegin;
610   ierr = KAIJMultAdd_Seq(A,xx,PETSC_NULL,yy);CHKERRQ(ierr);
611   PetscFunctionReturn(0);
612 }
613 
614 PetscErrorCode MatMultAdd_SeqKAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
615 {
616   PetscErrorCode ierr;
617   PetscFunctionBegin;
618   ierr = KAIJMultAdd_Seq(A,xx,yy,zz);CHKERRQ(ierr);
619   PetscFunctionReturn(0);
620 }
621 
622 #include <petsc/private/kernels/blockinvert.h>
623 
624 PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ_N(Mat A,const PetscScalar **values)
625 {
626   Mat_SeqKAIJ       *b  = (Mat_SeqKAIJ*)A->data;
627   Mat_SeqAIJ        *a  = (Mat_SeqAIJ*)b->AIJ->data;
628   const PetscScalar *S  = b->S;
629   const PetscScalar *T  = b->T;
630   const PetscScalar *v  = a->a;
631   const PetscInt     p  = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i;
632   PetscErrorCode    ierr;
633   PetscInt          i,j,*v_pivots,dof,dof2;
634   PetscScalar       *diag,aval,*v_work;
635 
636   PetscFunctionBegin;
637   if (p != q) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse.");
638   if ((!S) && (!T) && (!b->isTI)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix.");
639 
640   dof  = p;
641   dof2 = dof*dof;
642 
643   if (b->ibdiagvalid) {
644     if (values) *values = b->ibdiag;
645     PetscFunctionReturn(0);
646   }
647   if (!b->ibdiag) {
648     ierr = PetscMalloc1(dof2*m*sizeof(PetscScalar),&b->ibdiag);CHKERRQ(ierr);
649     ierr = PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar));CHKERRQ(ierr);
650   }
651   if (values) *values = b->ibdiag;
652   diag = b->ibdiag;
653 
654   ierr = PetscMalloc2(dof,&v_work,dof,&v_pivots);CHKERRQ(ierr);
655   for (i=0; i<m; i++) {
656     if (S) {
657       ierr = PetscMemcpy(diag,S,dof2*sizeof(PetscScalar));CHKERRQ(ierr);
658     } else {
659       ierr = PetscMemzero(diag,dof2*sizeof(PetscScalar));CHKERRQ(ierr);
660     }
661     if (b->isTI) {
662       aval = 0;
663       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
664       for (j=0; j<dof; j++) diag[j+dof*j] += aval;
665     } else if (T) {
666       aval = 0;
667       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
668       for (j=0; j<dof2; j++) diag[j] += aval*T[j];
669     }
670     ierr = PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL);CHKERRQ(ierr);
671     diag += dof2;
672   }
673   ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
674 
675   b->ibdiagvalid = PETSC_TRUE;
676   PetscFunctionReturn(0);
677 }
678 
679 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B)
680 {
681   Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data;
682 
683   PetscFunctionBegin;
684   *B = kaij->AIJ;
685   PetscFunctionReturn(0);
686 }
687 
688 PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
689 {
690   PetscErrorCode    ierr;
691   Mat_SeqKAIJ       *kaij = (Mat_SeqKAIJ*) A->data;
692   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)kaij->AIJ->data;
693   const PetscScalar *aa = a->a, *T = kaij->T, *v;
694   const PetscInt    m  = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi;
695   const PetscScalar *b, *xb, *idiag;
696   PetscScalar       *x, *work, *workt, *w, *y, *arr, *t, *arrt;
697   PetscInt          i, j, k, i2, bs, bs2, nz;
698 
699   PetscFunctionBegin;
700   its = its*lits;
701   if (flag & SOR_EISENSTAT) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
702   if (its <= 0)             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
703   if (fshift)               SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
704   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
705   if (p != q) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: Sorry, no support for non-square dense blocks");
706   else        {bs = p; bs2 = bs*bs; }
707 
708   if (!m) PetscFunctionReturn(0);
709 
710   if (!kaij->ibdiagvalid) { ierr = MatInvertBlockDiagonal_SeqKAIJ_N(A,NULL);CHKERRQ(ierr); }
711   idiag = kaij->ibdiag;
712   diag  = a->diag;
713 
714   if (!kaij->sor.setup) {
715     ierr = PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr);CHKERRQ(ierr);
716     kaij->sor.setup = PETSC_TRUE;
717   }
718   y     = kaij->sor.y;
719   w     = kaij->sor.w;
720   work  = kaij->sor.work;
721   t     = kaij->sor.t;
722   arr   = kaij->sor.arr;
723 
724   ierr = VecGetArray(xx,&x);    CHKERRQ(ierr);
725   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
726 
727   if (flag & SOR_ZERO_INITIAL_GUESS) {
728     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
729       PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);                            /* x[0:bs] <- D^{-1} b[0:bs] */
730       ierr   =  PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
731       i2     =  bs;
732       idiag  += bs2;
733       for (i=1; i<m; i++) {
734         v  = aa + ai[i];
735         vi = aj + ai[i];
736         nz = diag[i] - ai[i];
737 
738         if (T) {                /* b - T (Arow * x) */
739           for (k=0; k<bs; k++) w[k] = 0;
740           for (j=0; j<nz; j++) {
741             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
742           }
743           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]);
744           for (k=0; k<bs; k++) t[i2+k] += b[i2+k];
745         } else if (kaij->isTI) {
746           for (k=0; k<bs; k++) t[i2+k] = b[i2+k];
747           for (j=0; j<nz; j++) {
748             for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k];
749           }
750         } else {
751           for (k=0; k<bs; k++) t[i2+k] = b[i2+k];
752         }
753 
754         PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y);
755         for (j=0; j<bs; j++) x[i2+j] = omega * y[j];
756 
757         idiag += bs2;
758         i2    += bs;
759       }
760       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
761       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
762       xb = t;
763     } else xb = b;
764     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
765       idiag = kaij->ibdiag+bs2*(m-1);
766       i2    = bs * (m-1);
767       ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
768       PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
769       i2    -= bs;
770       idiag -= bs2;
771       for (i=m-2; i>=0; i--) {
772         v  = aa + diag[i] + 1 ;
773         vi = aj + diag[i] + 1;
774         nz = ai[i+1] - diag[i] - 1;
775 
776         if (T) {                /* FIXME: This branch untested */
777           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
778           /* copy all rows of x that are needed into contiguous space */
779           workt = work;
780           for (j=0; j<nz; j++) {
781             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
782             workt += bs;
783           }
784           arrt = arr;
785           for (j=0; j<nz; j++) {
786             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
787             for (k=0; k<bs2; k++) arrt[k] *= v[j];
788             arrt += bs2;
789           }
790           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
791         } else if (kaij->isTI) {
792           for (k=0; k<bs; k++) w[k] = t[i2+k];
793           for (j=0; j<nz; j++) {
794             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
795           }
796         }
797 
798         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */
799         for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j];
800 
801         idiag -= bs2;
802         i2    -= bs;
803       }
804       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
805     }
806     its--;
807   }
808   while (its--) {               /* FIXME: This branch not updated */
809     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
810       i2     =  0;
811       idiag  = kaij->ibdiag;
812       for (i=0; i<m; i++) {
813         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
814 
815         v  = aa + ai[i];
816         vi = aj + ai[i];
817         nz = diag[i] - ai[i];
818         workt = work;
819         for (j=0; j<nz; j++) {
820           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
821           workt += bs;
822         }
823         arrt = arr;
824         if (T) {
825           for (j=0; j<nz; j++) {
826             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
827             for (k=0; k<bs2; k++) arrt[k] *= v[j];
828             arrt += bs2;
829           }
830           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
831         } else if (kaij->isTI) {
832           for (j=0; j<nz; j++) {
833             ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
834             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
835             arrt += bs2;
836           }
837           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
838         }
839         ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
840 
841         v  = aa + diag[i] + 1;
842         vi = aj + diag[i] + 1;
843         nz = ai[i+1] - diag[i] - 1;
844         workt = work;
845         for (j=0; j<nz; j++) {
846           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
847           workt += bs;
848         }
849         arrt = arr;
850         if (T) {
851           for (j=0; j<nz; j++) {
852             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
853             for (k=0; k<bs2; k++) arrt[k] *= v[j];
854             arrt += bs2;
855           }
856           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
857         } else if (kaij->isTI) {
858           for (j=0; j<nz; j++) {
859             ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
860             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
861             arrt += bs2;
862           }
863           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
864         }
865 
866         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
867         for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
868 
869         idiag += bs2;
870         i2    += bs;
871       }
872       xb = t;
873     }
874     else xb = b;
875     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
876       idiag = kaij->ibdiag+bs2*(m-1);
877       i2    = bs * (m-1);
878       if (xb == b) {
879         for (i=m-1; i>=0; i--) {
880           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
881 
882           v  = aa + ai[i];
883           vi = aj + ai[i];
884           nz = diag[i] - ai[i];
885           workt = work;
886           for (j=0; j<nz; j++) {
887             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
888             workt += bs;
889           }
890           arrt = arr;
891           if (T) {
892             for (j=0; j<nz; j++) {
893               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
894               for (k=0; k<bs2; k++) arrt[k] *= v[j];
895               arrt += bs2;
896             }
897             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
898           } else if (kaij->isTI) {
899             for (j=0; j<nz; j++) {
900               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
901               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
902               arrt += bs2;
903             }
904             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
905           }
906 
907           v  = aa + diag[i] + 1;
908           vi = aj + diag[i] + 1;
909           nz = ai[i+1] - diag[i] - 1;
910           workt = work;
911           for (j=0; j<nz; j++) {
912             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
913             workt += bs;
914           }
915           arrt = arr;
916           if (T) {
917             for (j=0; j<nz; j++) {
918               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
919               for (k=0; k<bs2; k++) arrt[k] *= v[j];
920               arrt += bs2;
921             }
922             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
923           } else if (kaij->isTI) {
924             for (j=0; j<nz; j++) {
925               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
926               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
927               arrt += bs2;
928             }
929             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
930           }
931 
932           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
933           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
934         }
935       } else {
936         for (i=m-1; i>=0; i--) {
937           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
938           v  = aa + diag[i] + 1;
939           vi = aj + diag[i] + 1;
940           nz = ai[i+1] - diag[i] - 1;
941           workt = work;
942           for (j=0; j<nz; j++) {
943             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
944             workt += bs;
945           }
946           arrt = arr;
947           if (T) {
948             for (j=0; j<nz; j++) {
949               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
950               for (k=0; k<bs2; k++) arrt[k] *= v[j];
951               arrt += bs2;
952             }
953             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
954           } else if (kaij->isTI) {
955             for (j=0; j<nz; j++) {
956               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
957               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
958               arrt += bs2;
959             }
960             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
961           }
962           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
963           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
964         }
965         idiag -= bs2;
966         i2    -= bs;
967       }
968       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
969     }
970   }
971 
972   ierr = VecRestoreArray(xx,&x);    CHKERRQ(ierr);
973   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
974   PetscFunctionReturn(0);
975 }
976 
977 /*===================================================================================*/
978 
979 PetscErrorCode KAIJMultAdd_MPI(Mat A,Vec xx,Vec yy,Vec zz)
980 {
981   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
982   PetscErrorCode ierr;
983 
984   PetscFunctionBegin;
985   if (!yy) {
986     ierr = VecSet(zz,0.0);CHKERRQ(ierr);
987   } else {
988     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
989   }
990   /* start the scatter */
991   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
992   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz);CHKERRQ(ierr);
993   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
994   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
995   PetscFunctionReturn(0);
996 }
997 
998 PetscErrorCode MatMult_MPIKAIJ_dof(Mat A,Vec xx,Vec yy)
999 {
1000   PetscErrorCode ierr;
1001   PetscFunctionBegin;
1002   ierr = KAIJMultAdd_MPI(A,xx,PETSC_NULL,yy);CHKERRQ(ierr);
1003   PetscFunctionReturn(0);
1004 }
1005 
1006 PetscErrorCode MatMultAdd_MPIKAIJ_dof(Mat A,Vec xx,Vec yy, Vec zz)
1007 {
1008   PetscErrorCode ierr;
1009   PetscFunctionBegin;
1010   ierr = KAIJMultAdd_MPI(A,xx,yy,zz);CHKERRQ(ierr);
1011   PetscFunctionReturn(0);
1012 }
1013 
1014 PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ_dof(Mat A,const PetscScalar **values)
1015 {
1016   Mat_MPIKAIJ     *b = (Mat_MPIKAIJ*)A->data;
1017   PetscErrorCode  ierr;
1018 
1019   PetscFunctionBegin;
1020   ierr = (*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values);CHKERRQ(ierr);
1021   PetscFunctionReturn(0);
1022 }
1023 
1024 /* ----------------------------------------------------------------*/
1025 
1026 PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
1027 {
1028   Mat_SeqKAIJ     *b   = (Mat_SeqKAIJ*) A->data;
1029   PetscErrorCode  diag = PETSC_FALSE;
1030   PetscErrorCode  ierr;
1031   PetscInt        nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c;
1032   PetscScalar     *vaij,*v,*S=b->S,*T=b->T;
1033 
1034   PetscFunctionBegin;
1035   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1036   b->getrowactive = PETSC_TRUE;
1037   if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row);
1038 
1039   if ((!S) && (!T) && (!b->isTI)) {
1040     if (ncols)    *ncols  = 0;
1041     if (cols)     *cols   = NULL;
1042     if (values)   *values = NULL;
1043     PetscFunctionReturn(0);
1044   }
1045 
1046   if (T || b->isTI) {
1047     ierr  = MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij);CHKERRQ(ierr);
1048     c     = nzaij;
1049     for (i=0; i<nzaij; i++) {
1050       /* check if this row contains a diagonal entry */
1051       if (colsaij[i] == r) {
1052         diag = PETSC_TRUE;
1053         c = i;
1054       }
1055     }
1056   } else nzaij = c = 0;
1057 
1058   /* calculate size of row */
1059   nz = 0;
1060   if (S)            nz += q;
1061   if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q);
1062 
1063   if (cols || values) {
1064     ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr);
1065     for (i=0; i<q; i++) {
1066       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1067       v[i] = 0.0;
1068     }
1069     if (b->isTI) {
1070       for (i=0; i<nzaij; i++) {
1071         for (j=0; j<q; j++) {
1072           idx[i*q+j] = colsaij[i]*q+j;
1073           v[i*q+j]   = (j==s ? vaij[i] : 0);
1074         }
1075       }
1076     } else if (T) {
1077       for (i=0; i<nzaij; i++) {
1078         for (j=0; j<q; j++) {
1079           idx[i*q+j] = colsaij[i]*q+j;
1080           v[i*q+j]   = vaij[i]*T[s+j*p];
1081         }
1082       }
1083     }
1084     if (S) {
1085       for (j=0; j<q; j++) {
1086         idx[c*q+j] = r*q+j;
1087         v[c*q+j]  += S[s+j*p];
1088       }
1089     }
1090   }
1091 
1092   if (ncols)    *ncols  = nz;
1093   if (cols)     *cols   = idx;
1094   if (values)   *values = v;
1095   PetscFunctionReturn(0);
1096 }
1097 
1098 PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1099 {
1100   PetscErrorCode ierr;
1101   PetscFunctionBegin;
1102   ierr = PetscFree2(*idx,*v);CHKERRQ(ierr);
1103   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
1108 {
1109   Mat_MPIKAIJ     *b      = (Mat_MPIKAIJ*) A->data;
1110   Mat             MatAIJ  = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ;
1111   Mat             MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ;
1112   Mat             AIJ     = b->A;
1113   PetscBool       diag    = PETSC_FALSE;
1114   PetscErrorCode  ierr;
1115   const PetscInt  rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray;
1116   PetscInt        nz,*idx,ncolsaij,ncolsoaij,*colsaij,*colsoaij,r,s,c,i,j,lrow;
1117   PetscScalar     *v,*vals,*ovals,*S=b->S,*T=b->T;
1118 
1119   PetscFunctionBegin;
1120   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1121   b->getrowactive = PETSC_TRUE;
1122   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1123   lrow = row - rstart;
1124 
1125   if ((!S) && (!T) && (!b->isTI)) {
1126     if (ncols)    *ncols  = 0;
1127     if (cols)     *cols   = NULL;
1128     if (values)   *values = NULL;
1129     PetscFunctionReturn(0);
1130   }
1131 
1132   r = lrow/p;
1133   s = lrow%p;
1134 
1135   if (T || b->isTI) {
1136     ierr = MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray);
1137     ierr = MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals);CHKERRQ(ierr);
1138     ierr = MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals);CHKERRQ(ierr);
1139 
1140     c     = ncolsaij + ncolsoaij;
1141     for (i=0; i<ncolsaij; i++) {
1142       /* check if this row contains a diagonal entry */
1143       if (colsaij[i] == r) {
1144         diag = PETSC_TRUE;
1145         c = i;
1146       }
1147     }
1148   } else c = 0;
1149 
1150   /* calculate size of row */
1151   nz = 0;
1152   if (S)            nz += q;
1153   if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q);
1154 
1155   if (cols || values) {
1156     ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr);
1157     for (i=0; i<q; i++) {
1158       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1159       v[i] = 0.0;
1160     }
1161     if (b->isTI) {
1162       for (i=0; i<ncolsaij; i++) {
1163         for (j=0; j<q; j++) {
1164           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1165           v[i*q+j]   = (j==s ? vals[i] : 0.0);
1166         }
1167       }
1168       for (i=0; i<ncolsoaij; i++) {
1169         for (j=0; j<q; j++) {
1170           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1171           v[(i+ncolsaij)*q+j]   = (j==s ? ovals[i]: 0.0);
1172         }
1173       }
1174     } else if (T) {
1175       for (i=0; i<ncolsaij; i++) {
1176         for (j=0; j<q; j++) {
1177           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1178           v[i*q+j]   = vals[i]*T[s+j*p];
1179         }
1180       }
1181       for (i=0; i<ncolsoaij; i++) {
1182         for (j=0; j<q; j++) {
1183           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1184           v[(i+ncolsaij)*q+j]   = ovals[i]*T[s+j*p];
1185         }
1186       }
1187     }
1188     if (S) {
1189       for (j=0; j<q; j++) {
1190         idx[c*q+j] = (r+rstart/p)*q+j;
1191         v[c*q+j]  += S[s+j*p];
1192       }
1193     }
1194   }
1195 
1196   if (ncols)  *ncols  = nz;
1197   if (cols)   *cols   = idx;
1198   if (values) *values = v;
1199   PetscFunctionReturn(0);
1200 }
1201 
1202 PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1203 {
1204   PetscErrorCode ierr;
1205   PetscFunctionBegin;
1206   ierr = PetscFree2(*idx,*v);CHKERRQ(ierr);
1207   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
1208   PetscFunctionReturn(0);
1209 }
1210 
1211 PetscErrorCode  MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
1212 {
1213   PetscErrorCode ierr;
1214   Mat            A;
1215 
1216   PetscFunctionBegin;
1217   ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1218   ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr);
1219   ierr = MatDestroy(&A);CHKERRQ(ierr);
1220   PetscFunctionReturn(0);
1221 }
1222 
1223 /* ---------------------------------------------------------------------------------- */
1224 /*@C
1225   MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form:
1226 
1227     [I \otimes S + A \otimes T]
1228 
1229   where
1230     S is a dense (p \times q) matrix
1231     T is a dense (p \times q) matrix
1232     A is an AIJ  (n \times n) matrix
1233     I is the identity matrix
1234   The resulting matrix is (np \times nq)
1235 
1236   The matrix type is based on MATSEQAIJ for a sequential matrix A, and MATMPIAIJ for a distributed matrix A.
1237   S is always stored independently on all processes as a PetscScalar array in column-major format.
1238 
1239   Collective
1240 
1241   Input Parameters:
1242 + A - the AIJ matrix
1243 . S - the S matrix, stored as a PetscScalar array (column-major)
1244 . T - the T matrix, stored as a PetscScalar array (column-major)
1245 . p - number of rows in S and T
1246 - q - number of columns in S and T
1247 
1248   Output Parameter:
1249 . kaij - the new KAIJ matrix
1250 
1251   Operations provided:
1252 + MatMult
1253 . MatMultAdd
1254 . MatInvertBlockDiagonal
1255 - MatView
1256 
1257   Level: advanced
1258 
1259 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MATKAIJ
1260 @*/
1261 PetscErrorCode  MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij)
1262 {
1263   PetscErrorCode ierr;
1264   PetscMPIInt    size;
1265 
1266   PetscFunctionBegin;
1267   ierr = MatCreate(PetscObjectComm((PetscObject)A),kaij);CHKERRQ(ierr);
1268   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
1269   if (size == 1) {
1270     ierr = MatSetType(*kaij,MATSEQKAIJ);CHKERRQ(ierr);
1271   } else {
1272     ierr = MatSetType(*kaij,MATMPIKAIJ);CHKERRQ(ierr);
1273   }
1274   ierr = MatKAIJSetAIJ(*kaij,A);CHKERRQ(ierr);
1275   ierr = MatKAIJSetS(*kaij,p,q,S);CHKERRQ(ierr);
1276   ierr = MatKAIJSetT(*kaij,p,q,T);CHKERRQ(ierr);
1277   ierr = MatSetUp(*kaij);
1278   PetscFunctionReturn(0);
1279 }
1280 
1281 /*MC
1282   MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of the following form:
1283 
1284     [I \otimes S + A \otimes T]
1285 
1286   where
1287     S is a dense (p \times q) matrix
1288     T is a dense (p \times q) matrix
1289     A is an AIJ  (n \times n) matrix
1290     I is the identity matrix
1291   The resulting matrix is (np \times nq)
1292 
1293   The matrix type is based on MATSEQAIJ for a sequential matrix A, and MATMPIAIJ for a distributed matrix A.
1294   S and T are always stored independently on all processes as a PetscScalar array in column-major format.
1295 
1296   Operations provided:
1297 + MatMult
1298 . MatMultAdd
1299 . MatInvertBlockDiagonal
1300 - MatView
1301 
1302   Level: advanced
1303 
1304 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ()
1305 M*/
1306 
1307 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A)
1308 {
1309   PetscErrorCode ierr;
1310   Mat_MPIKAIJ    *b;
1311   PetscMPIInt    size;
1312 
1313   PetscFunctionBegin;
1314   ierr     = PetscNewLog(A,&b);CHKERRQ(ierr);
1315   A->data  = (void*)b;
1316 
1317   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1318 
1319   A->ops->setup = MatSetUp_KAIJ;
1320 
1321   b->w    = 0;
1322   ierr    = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
1323   if (size == 1) {
1324     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ);CHKERRQ(ierr);
1325     A->ops->setup               = MatSetUp_KAIJ;
1326     A->ops->destroy             = MatDestroy_SeqKAIJ;
1327     A->ops->view                = MatView_SeqKAIJ;
1328     A->ops->mult                = MatMult_SeqKAIJ_N;
1329     A->ops->multadd             = MatMultAdd_SeqKAIJ_N;
1330     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ_N;
1331     A->ops->getrow              = MatGetRow_SeqKAIJ;
1332     A->ops->restorerow          = MatRestoreRow_SeqKAIJ;
1333     A->ops->sor                 = MatSOR_SeqKAIJ;
1334   } else {
1335     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ);CHKERRQ(ierr);
1336     A->ops->setup               = MatSetUp_KAIJ;
1337     A->ops->destroy             = MatDestroy_MPIKAIJ;
1338     A->ops->view                = MatView_MPIKAIJ;
1339     A->ops->mult                = MatMult_MPIKAIJ_dof;
1340     A->ops->multadd             = MatMultAdd_MPIKAIJ_dof;
1341     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ_dof;
1342     A->ops->getrow              = MatGetRow_MPIKAIJ;
1343     A->ops->restorerow          = MatRestoreRow_MPIKAIJ;
1344     ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ);CHKERRQ(ierr);
1345   }
1346   A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ;
1347   PetscFunctionReturn(0);
1348 }
1349