xref: /petsc/src/mat/impls/kaij/kaij.c (revision cea808453c43162dbe7fc67125be59a8aaa4d813)
1 
2 /*
3   Defines the basic matrix operations for the KAIJ  matrix storage format.
4   This format is used to evaluate matrices of the form:
5 
6     [I \otimes S + A \otimes T]
7 
8   where
9     S is a dense (p \times q) matrix
10     T is a dense (p \times q) matrix
11     A is an AIJ  (n \times n) matrix
12     I is the identity matrix
13 
14   The resulting matrix is (np \times nq)
15 
16   We provide:
17      MatMult()
18      MatMultAdd()
19      MatInvertBlockDiagonal()
20   and
21      MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*)
22 
23   This single directory handles both the sequential and parallel codes
24 */
25 
26 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/
27 #include <../src/mat/utils/freespace.h>
28 #include <petsc/private/vecimpl.h>
29 
30 /*@C
31    MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix
32 
33    Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel
34 
35    Input Parameter:
36 .  A - the KAIJ matrix
37 
38    Output Parameter:
39 .  B - the AIJ matrix
40 
41    Level: advanced
42 
43    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
44 
45 .seealso: MatCreateKAIJ()
46 @*/
47 PetscErrorCode  MatKAIJGetAIJ(Mat A,Mat *B)
48 {
49   PetscErrorCode ierr;
50   PetscBool      ismpikaij,isseqkaij;
51 
52   PetscFunctionBegin;
53   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr);
54   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij);CHKERRQ(ierr);
55   if (ismpikaij) {
56     Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
57 
58     *B = b->A;
59   } else if (isseqkaij) {
60     Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
61 
62     *B = b->AIJ;
63   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix passed in is not of type KAIJ");
64   PetscFunctionReturn(0);
65 }
66 
67 /*@C
68    MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix
69 
70    Not Collective; the entire S is stored and returned independently on all processes.
71 
72    Input Parameter:
73 .  A - the KAIJ matrix
74 
75    Output Parameters:
76 +  m - the number of rows in S
77 .  n - the number of columns in S
78 -  S - the S matrix, in form of a scalar array in column-major format
79 
80    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
81 
82    Level: advanced
83 
84 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
85 @*/
86 PetscErrorCode MatKAIJGetS(Mat A,PetscInt *m,PetscInt *n,PetscScalar **S)
87 {
88   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
89   PetscFunctionBegin;
90   if (m) *m = b->p;
91   if (n) *n = b->q;
92   if (S) *S = b->S;
93   PetscFunctionReturn(0);
94 }
95 
96 /*@C
97    MatKAIJGetSRead - Get a read-only pointer to the S matrix describing the shift action of the KAIJ matrix
98 
99    Not Collective; the entire S is stored and returned independently on all processes.
100 
101    Input Parameter:
102 .  A - the KAIJ matrix
103 
104    Output Parameters:
105 +  m - the number of rows in S
106 .  n - the number of columns in S
107 -  S - the S matrix, in form of a scalar array in column-major format
108 
109    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
110 
111    Level: advanced
112 
113 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
114 @*/
115 PetscErrorCode MatKAIJGetSRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **S)
116 {
117   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
118   PetscFunctionBegin;
119   if (m) *m = b->p;
120   if (n) *n = b->q;
121   if (S) *S = b->S;
122   PetscFunctionReturn(0);
123 }
124 
125 /*@C
126   MatKAIJRestoreS - Restore array obtained with MatKAIJGetS()
127 
128   Not collective
129 
130   Input Parameter:
131 . A - the KAIJ matrix
132 
133   Output Parameter:
134 . S - location of pointer to array obtained with MatKAIJGetS()
135 
136   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
137   If NULL is passed, it will not attempt to zero the array pointer.
138 
139   Level: advanced
140 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead()
141 @*/
142 PetscErrorCode MatKAIJRestoreS(Mat A,PetscScalar **S)
143 {
144   PetscFunctionBegin;
145   if (S) *S = NULL;
146   PetscObjectStateIncrease((PetscObject)A);
147   PetscFunctionReturn(0);
148 }
149 
150 /*@C
151   MatKAIJRestoreSRead - Restore array obtained with MatKAIJGetSRead()
152 
153   Not collective
154 
155   Input Parameter:
156 . A - the KAIJ matrix
157 
158   Output Parameter:
159 . S - location of pointer to array obtained with MatKAIJGetS()
160 
161   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
162   If NULL is passed, it will not attempt to zero the array pointer.
163 
164   Level: advanced
165 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead()
166 @*/
167 PetscErrorCode MatKAIJRestoreSRead(Mat A,const PetscScalar **S)
168 {
169   PetscFunctionBegin;
170   if (S) *S = NULL;
171   PetscFunctionReturn(0);
172 }
173 
174 /*@C
175    MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix
176 
177    Not Collective; the entire T is stored and returned independently on all processes
178 
179    Input Parameter:
180 .  A - the KAIJ matrix
181 
182    Output Parameter:
183 +  m - the number of rows in T
184 .  n - the number of columns in T
185 -  T - the T matrix, in form of a scalar array in column-major format
186 
187    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
188 
189    Level: advanced
190 
191 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
192 @*/
193 PetscErrorCode MatKAIJGetT(Mat A,PetscInt *m,PetscInt *n,PetscScalar **T)
194 {
195   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
196   PetscFunctionBegin;
197   if (m) *m = b->p;
198   if (n) *n = b->q;
199   if (T) *T = b->T;
200   PetscFunctionReturn(0);
201 }
202 
203 /*@C
204    MatKAIJGetTRead - Get a read-only pointer to the transformation matrix T associated with the KAIJ matrix
205 
206    Not Collective; the entire T is stored and returned independently on all processes
207 
208    Input Parameter:
209 .  A - the KAIJ matrix
210 
211    Output Parameter:
212 +  m - the number of rows in T
213 .  n - the number of columns in T
214 -  T - the T matrix, in form of a scalar array in column-major format
215 
216    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
217 
218    Level: advanced
219 
220 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
221 @*/
222 PetscErrorCode MatKAIJGetTRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **T)
223 {
224   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
225   PetscFunctionBegin;
226   if (m) *m = b->p;
227   if (n) *n = b->q;
228   if (T) *T = b->T;
229   PetscFunctionReturn(0);
230 }
231 
232 /*@C
233   MatKAIJRestoreT - Restore array obtained with MatKAIJGetT()
234 
235   Not collective
236 
237   Input Parameter:
238 . A - the KAIJ matrix
239 
240   Output Parameter:
241 . T - location of pointer to array obtained with MatKAIJGetS()
242 
243   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
244   If NULL is passed, it will not attempt to zero the array pointer.
245 
246   Level: advanced
247 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead()
248 @*/
249 PetscErrorCode MatKAIJRestoreT(Mat A,PetscScalar **T)
250 {
251   PetscFunctionBegin;
252   if (T) *T = NULL;
253   PetscObjectStateIncrease((PetscObject)A);
254   PetscFunctionReturn(0);
255 }
256 
257 /*@C
258   MatKAIJRestoreTRead - Restore array obtained with MatKAIJGetTRead()
259 
260   Not collective
261 
262   Input Parameter:
263 . A - the KAIJ matrix
264 
265   Output Parameter:
266 . T - location of pointer to array obtained with MatKAIJGetS()
267 
268   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
269   If NULL is passed, it will not attempt to zero the array pointer.
270 
271   Level: advanced
272 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead()
273 @*/
274 PetscErrorCode MatKAIJRestoreTRead(Mat A,const PetscScalar **T)
275 {
276   PetscFunctionBegin;
277   if (T) *T = NULL;
278   PetscFunctionReturn(0);
279 }
280 
281 /*@
282    MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix
283 
284    Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel
285 
286    Input Parameters:
287 +  A - the KAIJ matrix
288 -  B - the AIJ matrix
289 
290    Notes:
291    This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed.
292    Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix.
293 
294    Level: advanced
295 
296 .seealso: MatKAIJGetAIJ(), MatKAIJSetS(), MatKAIJSetT()
297 @*/
298 PetscErrorCode MatKAIJSetAIJ(Mat A,Mat B)
299 {
300   PetscErrorCode ierr;
301   PetscMPIInt    size;
302 
303   PetscFunctionBegin;
304   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
305   if (size == 1) {
306     Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
307     a->AIJ = B;
308   } else {
309     Mat_MPIKAIJ *a = (Mat_MPIKAIJ*)A->data;
310     a->A = B;
311   }
312   ierr = PetscObjectReference((PetscObject)B);CHKERRQ(ierr);
313   PetscFunctionReturn(0);
314 }
315 
316 /*@C
317    MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix
318 
319    Logically Collective; the entire S is stored independently on all processes.
320 
321    Input Parameters:
322 +  A - the KAIJ matrix
323 .  p - the number of rows in S
324 .  q - the number of columns in S
325 -  S - the S matrix, in form of a scalar array in column-major format
326 
327    Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix.
328    The S matrix is copied, so the user can destroy this array.
329 
330    Level: Advanced
331 
332 .seealso: MatKAIJGetS(), MatKAIJSetT(), MatKAIJSetAIJ()
333 @*/
334 PetscErrorCode MatKAIJSetS(Mat A,PetscInt p,PetscInt q,const PetscScalar S[])
335 {
336   PetscErrorCode ierr;
337   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
338 
339   PetscFunctionBegin;
340   ierr = PetscFree(a->S);CHKERRQ(ierr);
341   if (S) {
342     ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->S);CHKERRQ(ierr);
343     ierr = PetscMemcpy(a->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
344   } else  a->S = NULL;
345 
346   a->p = p;
347   a->q = q;
348   PetscFunctionReturn(0);
349 }
350 
351 /*@C
352    MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix
353 
354    Logically Collective; the entire T is stored independently on all processes.
355 
356    Input Parameters:
357 +  A - the KAIJ matrix
358 .  p - the number of rows in S
359 .  q - the number of columns in S
360 -  T - the T matrix, in form of a scalar array in column-major format
361 
362    Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix.
363    The T matrix is copied, so the user can destroy this array.
364 
365    Level: Advanced
366 
367 .seealso: MatKAIJGetT(), MatKAIJSetS(), MatKAIJSetAIJ()
368 @*/
369 PetscErrorCode MatKAIJSetT(Mat A,PetscInt p,PetscInt q,const PetscScalar T[])
370 {
371   PetscErrorCode ierr;
372   PetscInt       i,j;
373   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
374   PetscBool      isTI = PETSC_FALSE;
375 
376   PetscFunctionBegin;
377   /* check if T is an identity matrix */
378   if (T && (p == q)) {
379     isTI = PETSC_TRUE;
380     for (i=0; i<p; i++) {
381       for (j=0; j<q; j++) {
382         if (i == j) {
383           /* diagonal term must be 1 */
384           if (T[i+j*p] != 1.0) isTI = PETSC_FALSE;
385         } else {
386           /* off-diagonal term must be 0 */
387           if (T[i+j*p] != 0.0) isTI = PETSC_FALSE;
388         }
389       }
390     }
391   }
392   a->isTI = isTI;
393 
394   ierr = PetscFree(a->T);CHKERRQ(ierr);
395   if (T && (!isTI)) {
396     ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->T);CHKERRQ(ierr);
397     ierr = PetscMemcpy(a->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
398   } else a->T = NULL;
399 
400   a->p = p;
401   a->q = q;
402   PetscFunctionReturn(0);
403 }
404 
405 PetscErrorCode MatDestroy_SeqKAIJ(Mat A)
406 {
407   PetscErrorCode ierr;
408   Mat_SeqKAIJ    *b = (Mat_SeqKAIJ*)A->data;
409 
410   PetscFunctionBegin;
411   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
412   ierr = PetscFree(b->S);CHKERRQ(ierr);
413   ierr = PetscFree(b->T);CHKERRQ(ierr);
414   ierr = PetscFree(b->ibdiag);CHKERRQ(ierr);
415   ierr = PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr);CHKERRQ(ierr);
416   ierr = PetscFree(A->data);CHKERRQ(ierr);
417   PetscFunctionReturn(0);
418 }
419 
420 PetscErrorCode MatSetUp_KAIJ(Mat A)
421 {
422   PetscErrorCode ierr;
423   PetscInt       n;
424   PetscMPIInt    size;
425   Mat_SeqKAIJ    *seqkaij = (Mat_SeqKAIJ*)A->data;
426 
427   PetscFunctionBegin;
428   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
429   if (size == 1) {
430     ierr = MatSetSizes(A,seqkaij->p*seqkaij->AIJ->rmap->n,seqkaij->q*seqkaij->AIJ->cmap->n,seqkaij->p*seqkaij->AIJ->rmap->N,seqkaij->q*seqkaij->AIJ->cmap->N);CHKERRQ(ierr);
431     ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr);
432     ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr);
433     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
434     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
435   } else {
436     Mat_MPIKAIJ *a;
437     Mat_MPIAIJ  *mpiaij;
438     IS          from,to;
439     Vec         gvec;
440     PetscScalar *T;
441     PetscInt    i,j;
442 
443     a = (Mat_MPIKAIJ*)A->data;
444     mpiaij = (Mat_MPIAIJ*)a->A->data;
445     ierr = MatSetSizes(A,a->p*a->A->rmap->n,a->q*a->A->cmap->n,a->p*a->A->rmap->N,a->q*a->A->cmap->N);CHKERRQ(ierr);
446     ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr);
447     ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr);
448     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
449     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
450 
451     if (a->isTI) {
452       /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL.
453        * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will
454        * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix
455        * to pass in. */
456       ierr = PetscMalloc1(a->p*a->q*sizeof(PetscScalar),&T);CHKERRQ(ierr);
457       for (i=0; i<a->p; i++) {
458         for (j=0; j<a->q; j++) {
459           if (i==j) T[i+j*a->p] = 1.0;
460           else      T[i+j*a->p] = 0.0;
461         }
462       }
463     } else T = a->T;
464     ierr = MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ);CHKERRQ(ierr);
465     ierr = MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ);CHKERRQ(ierr);
466     ierr = PetscFree(T);CHKERRQ(ierr);
467 
468     ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
469     ierr = VecCreate(PETSC_COMM_SELF,&a->w);CHKERRQ(ierr);
470     ierr = VecSetSizes(a->w,n*a->q,n*a->q);CHKERRQ(ierr);
471     ierr = VecSetBlockSize(a->w,a->q);CHKERRQ(ierr);
472     ierr = VecSetType(a->w,VECSEQ);CHKERRQ(ierr);
473 
474     /* create two temporary Index sets for build scatter gather */
475     ierr = ISCreateBlock(PetscObjectComm((PetscObject)a->A),a->q,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
476     ierr = ISCreateStride(PETSC_COMM_SELF,n*a->q,0,1,&to);CHKERRQ(ierr);
477 
478     /* create temporary global vector to generate scatter context */
479     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A),a->q,a->q*a->A->cmap->n,a->q*a->A->cmap->N,NULL,&gvec);CHKERRQ(ierr);
480 
481     /* generate the scatter context */
482     ierr = VecScatterCreate(gvec,from,a->w,to,&a->ctx);CHKERRQ(ierr);
483 
484     ierr = ISDestroy(&from);CHKERRQ(ierr);
485     ierr = ISDestroy(&to);CHKERRQ(ierr);
486     ierr = VecDestroy(&gvec);CHKERRQ(ierr);
487   }
488 
489   A->assembled = PETSC_TRUE;
490   PetscFunctionReturn(0);
491 }
492 
493 PetscErrorCode MatView_SeqKAIJ(Mat A,PetscViewer viewer)
494 {
495   PetscErrorCode ierr;
496   Mat            B;
497 
498   PetscFunctionBegin;
499   ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
500   ierr = MatView(B,viewer);CHKERRQ(ierr);
501   ierr = MatDestroy(&B);CHKERRQ(ierr);
502   PetscFunctionReturn(0);
503 }
504 
505 PetscErrorCode MatView_MPIKAIJ(Mat A,PetscViewer viewer)
506 {
507   PetscErrorCode ierr;
508   Mat            B;
509 
510   PetscFunctionBegin;
511   ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
512   ierr = MatView(B,viewer);CHKERRQ(ierr);
513   ierr = MatDestroy(&B);CHKERRQ(ierr);
514   PetscFunctionReturn(0);
515 }
516 
517 PetscErrorCode MatDestroy_MPIKAIJ(Mat A)
518 {
519   PetscErrorCode ierr;
520   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
521 
522   PetscFunctionBegin;
523   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
524   ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr);
525   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
526   ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr);
527   ierr = VecDestroy(&b->w);CHKERRQ(ierr);
528   ierr = PetscFree(b->S);CHKERRQ(ierr);
529   ierr = PetscFree(b->T);CHKERRQ(ierr);
530   ierr = PetscFree(b->ibdiag);CHKERRQ(ierr);
531   ierr = PetscFree(A->data);CHKERRQ(ierr);
532   PetscFunctionReturn(0);
533 }
534 
535 /* --------------------------------------------------------------------------------------*/
536 
537 /* zz = yy + Axx */
538 PetscErrorCode MatMultAdd_SeqKAIJ(Mat A,Vec xx,Vec yy,Vec zz)
539 {
540   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ*)A->data;
541   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
542   const PetscScalar *s = b->S, *t = b->T;
543   const PetscScalar *x,*v,*bx;
544   PetscScalar       *y,*sums;
545   PetscErrorCode    ierr;
546   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
547   PetscInt          n,i,jrow,j,l,p=b->p,q=b->q,k;
548 
549   PetscFunctionBegin;
550   if (!yy) {
551     ierr = VecSet(zz,0.0);CHKERRQ(ierr);
552   } else {
553     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
554   }
555   if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0);
556 
557   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
558   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
559   idx  = a->j;
560   v    = a->a;
561   ii   = a->i;
562 
563   if (b->isTI) {
564     for (i=0; i<m; i++) {
565       jrow = ii[i];
566       n    = ii[i+1] - jrow;
567       sums = y + p*i;
568       for (j=0; j<n; j++) {
569         for (k=0; k<p; k++) {
570           sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k];
571         }
572       }
573     }
574   } else if (t) {
575     for (i=0; i<m; i++) {
576       jrow = ii[i];
577       n    = ii[i+1] - jrow;
578       sums = y + p*i;
579       bx   = x + q*i;
580       for (j=0; j<n; j++) {
581         for (k=0; k<p; k++) {
582           for (l=0; l<q; l++) {
583             sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l];
584           }
585         }
586       }
587     }
588   }
589   if (s) {
590     for (i=0; i<m; i++) {
591       sums = y + p*i;
592       bx   = x + q*i;
593       if (i < b->AIJ->cmap->n) {
594         for (j=0; j<q; j++) {
595           for (k=0; k<p; k++) {
596             sums[k] += s[k+j*p]*bx[j];
597           }
598         }
599       }
600     }
601   }
602 
603   ierr = PetscLogFlops((2.0*p*q-p)*m+2*p*a->nz);CHKERRQ(ierr);
604   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
605   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
606   PetscFunctionReturn(0);
607 }
608 
609 PetscErrorCode MatMult_SeqKAIJ_N(Mat A,Vec xx,Vec yy)
610 {
611   PetscErrorCode ierr;
612   PetscFunctionBegin;
613   ierr = MatMultAdd_SeqKAIJ(A,xx,PETSC_NULL,yy);CHKERRQ(ierr);
614   PetscFunctionReturn(0);
615 }
616 
617 PetscErrorCode MatMultAdd_SeqKAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
618 {
619   PetscErrorCode ierr;
620   PetscFunctionBegin;
621   ierr = MatMultAdd_SeqKAIJ(A,xx,yy,zz);CHKERRQ(ierr);
622   PetscFunctionReturn(0);
623 }
624 
625 #include <petsc/private/kernels/blockinvert.h>
626 
627 PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ_N(Mat A,const PetscScalar **values)
628 {
629   Mat_SeqKAIJ       *b  = (Mat_SeqKAIJ*)A->data;
630   Mat_SeqAIJ        *a  = (Mat_SeqAIJ*)b->AIJ->data;
631   const PetscScalar *S  = b->S;
632   const PetscScalar *T  = b->T;
633   const PetscScalar *v  = a->a;
634   const PetscInt     p  = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i;
635   PetscErrorCode    ierr;
636   PetscInt          i,j,*v_pivots,dof,dof2;
637   PetscScalar       *diag,aval,*v_work;
638 
639   PetscFunctionBegin;
640   if (p != q) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse.");
641   if ((!S) && (!T) && (!b->isTI)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix.");
642 
643   dof  = p;
644   dof2 = dof*dof;
645 
646   if (b->ibdiagvalid) {
647     if (values) *values = b->ibdiag;
648     PetscFunctionReturn(0);
649   }
650   if (!b->ibdiag) {
651     ierr = PetscMalloc1(dof2*m*sizeof(PetscScalar),&b->ibdiag);CHKERRQ(ierr);
652     ierr = PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar));CHKERRQ(ierr);
653   }
654   if (values) *values = b->ibdiag;
655   diag = b->ibdiag;
656 
657   ierr = PetscMalloc2(dof,&v_work,dof,&v_pivots);CHKERRQ(ierr);
658   for (i=0; i<m; i++) {
659     if (S) {
660       ierr = PetscMemcpy(diag,S,dof2*sizeof(PetscScalar));CHKERRQ(ierr);
661     } else {
662       ierr = PetscMemzero(diag,dof2*sizeof(PetscScalar));CHKERRQ(ierr);
663     }
664     if (b->isTI) {
665       aval = 0;
666       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
667       for (j=0; j<dof; j++) diag[j+dof*j] += aval;
668     } else if (T) {
669       aval = 0;
670       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
671       for (j=0; j<dof2; j++) diag[j] += aval*T[j];
672     }
673     ierr = PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL);CHKERRQ(ierr);
674     diag += dof2;
675   }
676   ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
677 
678   b->ibdiagvalid = PETSC_TRUE;
679   PetscFunctionReturn(0);
680 }
681 
682 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B)
683 {
684   Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data;
685 
686   PetscFunctionBegin;
687   *B = kaij->AIJ;
688   PetscFunctionReturn(0);
689 }
690 
691 PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
692 {
693   PetscErrorCode    ierr;
694   Mat_SeqKAIJ       *kaij = (Mat_SeqKAIJ*) A->data;
695   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)kaij->AIJ->data;
696   const PetscScalar *aa = a->a, *T = kaij->T, *v;
697   const PetscInt    m  = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi;
698   const PetscScalar *b, *xb, *idiag;
699   PetscScalar       *x, *work, *workt, *w, *y, *arr, *t, *arrt;
700   PetscInt          i, j, k, i2, bs, bs2, nz;
701 
702   PetscFunctionBegin;
703   its = its*lits;
704   if (flag & SOR_EISENSTAT) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
705   if (its <= 0)             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
706   if (fshift)               SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
707   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
708   if (p != q) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: Sorry, no support for non-square dense blocks");
709   else        {bs = p; bs2 = bs*bs; }
710 
711   if (!m) PetscFunctionReturn(0);
712 
713   if (!kaij->ibdiagvalid) { ierr = MatInvertBlockDiagonal_SeqKAIJ_N(A,NULL);CHKERRQ(ierr); }
714   idiag = kaij->ibdiag;
715   diag  = a->diag;
716 
717   if (!kaij->sor.setup) {
718     ierr = PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr);CHKERRQ(ierr);
719     kaij->sor.setup = PETSC_TRUE;
720   }
721   y     = kaij->sor.y;
722   w     = kaij->sor.w;
723   work  = kaij->sor.work;
724   t     = kaij->sor.t;
725   arr   = kaij->sor.arr;
726 
727   ierr = VecGetArray(xx,&x);    CHKERRQ(ierr);
728   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
729 
730   if (flag & SOR_ZERO_INITIAL_GUESS) {
731     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
732       PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);                            /* x[0:bs] <- D^{-1} b[0:bs] */
733       ierr   =  PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
734       i2     =  bs;
735       idiag  += bs2;
736       for (i=1; i<m; i++) {
737         v  = aa + ai[i];
738         vi = aj + ai[i];
739         nz = diag[i] - ai[i];
740 
741         if (T) {                /* b - T (Arow * x) */
742           for (k=0; k<bs; k++) w[k] = 0;
743           for (j=0; j<nz; j++) {
744             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
745           }
746           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]);
747           for (k=0; k<bs; k++) t[i2+k] += b[i2+k];
748         } else if (kaij->isTI) {
749           for (k=0; k<bs; k++) t[i2+k] = b[i2+k];
750           for (j=0; j<nz; j++) {
751             for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k];
752           }
753         } else {
754           for (k=0; k<bs; k++) t[i2+k] = b[i2+k];
755         }
756 
757         PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y);
758         for (j=0; j<bs; j++) x[i2+j] = omega * y[j];
759 
760         idiag += bs2;
761         i2    += bs;
762       }
763       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
764       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
765       xb = t;
766     } else xb = b;
767     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
768       idiag = kaij->ibdiag+bs2*(m-1);
769       i2    = bs * (m-1);
770       ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
771       PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
772       i2    -= bs;
773       idiag -= bs2;
774       for (i=m-2; i>=0; i--) {
775         v  = aa + diag[i] + 1 ;
776         vi = aj + diag[i] + 1;
777         nz = ai[i+1] - diag[i] - 1;
778 
779         if (T) {                /* FIXME: This branch untested */
780           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
781           /* copy all rows of x that are needed into contiguous space */
782           workt = work;
783           for (j=0; j<nz; j++) {
784             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
785             workt += bs;
786           }
787           arrt = arr;
788           for (j=0; j<nz; j++) {
789             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
790             for (k=0; k<bs2; k++) arrt[k] *= v[j];
791             arrt += bs2;
792           }
793           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
794         } else if (kaij->isTI) {
795           for (k=0; k<bs; k++) w[k] = t[i2+k];
796           for (j=0; j<nz; j++) {
797             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
798           }
799         }
800 
801         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */
802         for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j];
803 
804         idiag -= bs2;
805         i2    -= bs;
806       }
807       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
808     }
809     its--;
810   }
811   while (its--) {               /* FIXME: This branch not updated */
812     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
813       i2     =  0;
814       idiag  = kaij->ibdiag;
815       for (i=0; i<m; i++) {
816         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
817 
818         v  = aa + ai[i];
819         vi = aj + ai[i];
820         nz = diag[i] - ai[i];
821         workt = work;
822         for (j=0; j<nz; j++) {
823           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
824           workt += bs;
825         }
826         arrt = arr;
827         if (T) {
828           for (j=0; j<nz; j++) {
829             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
830             for (k=0; k<bs2; k++) arrt[k] *= v[j];
831             arrt += bs2;
832           }
833           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
834         } else if (kaij->isTI) {
835           for (j=0; j<nz; j++) {
836             ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
837             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
838             arrt += bs2;
839           }
840           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
841         }
842         ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
843 
844         v  = aa + diag[i] + 1;
845         vi = aj + diag[i] + 1;
846         nz = ai[i+1] - diag[i] - 1;
847         workt = work;
848         for (j=0; j<nz; j++) {
849           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
850           workt += bs;
851         }
852         arrt = arr;
853         if (T) {
854           for (j=0; j<nz; j++) {
855             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
856             for (k=0; k<bs2; k++) arrt[k] *= v[j];
857             arrt += bs2;
858           }
859           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
860         } else if (kaij->isTI) {
861           for (j=0; j<nz; j++) {
862             ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
863             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
864             arrt += bs2;
865           }
866           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
867         }
868 
869         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
870         for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
871 
872         idiag += bs2;
873         i2    += bs;
874       }
875       xb = t;
876     }
877     else xb = b;
878     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
879       idiag = kaij->ibdiag+bs2*(m-1);
880       i2    = bs * (m-1);
881       if (xb == b) {
882         for (i=m-1; i>=0; i--) {
883           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
884 
885           v  = aa + ai[i];
886           vi = aj + ai[i];
887           nz = diag[i] - ai[i];
888           workt = work;
889           for (j=0; j<nz; j++) {
890             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
891             workt += bs;
892           }
893           arrt = arr;
894           if (T) {
895             for (j=0; j<nz; j++) {
896               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
897               for (k=0; k<bs2; k++) arrt[k] *= v[j];
898               arrt += bs2;
899             }
900             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
901           } else if (kaij->isTI) {
902             for (j=0; j<nz; j++) {
903               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
904               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
905               arrt += bs2;
906             }
907             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
908           }
909 
910           v  = aa + diag[i] + 1;
911           vi = aj + diag[i] + 1;
912           nz = ai[i+1] - diag[i] - 1;
913           workt = work;
914           for (j=0; j<nz; j++) {
915             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
916             workt += bs;
917           }
918           arrt = arr;
919           if (T) {
920             for (j=0; j<nz; j++) {
921               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
922               for (k=0; k<bs2; k++) arrt[k] *= v[j];
923               arrt += bs2;
924             }
925             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
926           } else if (kaij->isTI) {
927             for (j=0; j<nz; j++) {
928               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
929               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
930               arrt += bs2;
931             }
932             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
933           }
934 
935           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
936           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
937         }
938       } else {
939         for (i=m-1; i>=0; i--) {
940           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
941           v  = aa + diag[i] + 1;
942           vi = aj + diag[i] + 1;
943           nz = ai[i+1] - diag[i] - 1;
944           workt = work;
945           for (j=0; j<nz; j++) {
946             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
947             workt += bs;
948           }
949           arrt = arr;
950           if (T) {
951             for (j=0; j<nz; j++) {
952               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
953               for (k=0; k<bs2; k++) arrt[k] *= v[j];
954               arrt += bs2;
955             }
956             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
957           } else if (kaij->isTI) {
958             for (j=0; j<nz; j++) {
959               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
960               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
961               arrt += bs2;
962             }
963             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
964           }
965           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
966           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
967         }
968         idiag -= bs2;
969         i2    -= bs;
970       }
971       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
972     }
973   }
974 
975   ierr = VecRestoreArray(xx,&x);    CHKERRQ(ierr);
976   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
977   PetscFunctionReturn(0);
978 }
979 
980 /*===================================================================================*/
981 
982 PetscErrorCode MatMultAdd_MPIKAIJ(Mat A,Vec xx,Vec yy,Vec zz)
983 {
984   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
985   PetscErrorCode ierr;
986 
987   PetscFunctionBegin;
988   if (!yy) {
989     ierr = VecSet(zz,0.0);CHKERRQ(ierr);
990   } else {
991     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
992   }
993   /* start the scatter */
994   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
995   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz);CHKERRQ(ierr);
996   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
997   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
998   PetscFunctionReturn(0);
999 }
1000 
1001 PetscErrorCode MatMult_MPIKAIJ_dof(Mat A,Vec xx,Vec yy)
1002 {
1003   PetscErrorCode ierr;
1004   PetscFunctionBegin;
1005   ierr = MatMultAdd_MPIKAIJ(A,xx,PETSC_NULL,yy);CHKERRQ(ierr);
1006   PetscFunctionReturn(0);
1007 }
1008 
1009 PetscErrorCode MatMultAdd_MPIKAIJ_dof(Mat A,Vec xx,Vec yy, Vec zz)
1010 {
1011   PetscErrorCode ierr;
1012   PetscFunctionBegin;
1013   ierr = MatMultAdd_MPIKAIJ(A,xx,yy,zz);CHKERRQ(ierr);
1014   PetscFunctionReturn(0);
1015 }
1016 
1017 PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ_dof(Mat A,const PetscScalar **values)
1018 {
1019   Mat_MPIKAIJ     *b = (Mat_MPIKAIJ*)A->data;
1020   PetscErrorCode  ierr;
1021 
1022   PetscFunctionBegin;
1023   ierr = (*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values);CHKERRQ(ierr);
1024   PetscFunctionReturn(0);
1025 }
1026 
1027 /* ----------------------------------------------------------------*/
1028 
1029 PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
1030 {
1031   Mat_SeqKAIJ     *b   = (Mat_SeqKAIJ*) A->data;
1032   PetscErrorCode  diag = PETSC_FALSE;
1033   PetscErrorCode  ierr;
1034   PetscInt        nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c;
1035   PetscScalar     *vaij,*v,*S=b->S,*T=b->T;
1036 
1037   PetscFunctionBegin;
1038   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1039   b->getrowactive = PETSC_TRUE;
1040   if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row);
1041 
1042   if ((!S) && (!T) && (!b->isTI)) {
1043     if (ncols)    *ncols  = 0;
1044     if (cols)     *cols   = NULL;
1045     if (values)   *values = NULL;
1046     PetscFunctionReturn(0);
1047   }
1048 
1049   if (T || b->isTI) {
1050     ierr  = MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij);CHKERRQ(ierr);
1051     c     = nzaij;
1052     for (i=0; i<nzaij; i++) {
1053       /* check if this row contains a diagonal entry */
1054       if (colsaij[i] == r) {
1055         diag = PETSC_TRUE;
1056         c = i;
1057       }
1058     }
1059   } else nzaij = c = 0;
1060 
1061   /* calculate size of row */
1062   nz = 0;
1063   if (S)            nz += q;
1064   if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q);
1065 
1066   if (cols || values) {
1067     ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr);
1068     for (i=0; i<q; i++) {
1069       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1070       v[i] = 0.0;
1071     }
1072     if (b->isTI) {
1073       for (i=0; i<nzaij; i++) {
1074         for (j=0; j<q; j++) {
1075           idx[i*q+j] = colsaij[i]*q+j;
1076           v[i*q+j]   = (j==s ? vaij[i] : 0);
1077         }
1078       }
1079     } else if (T) {
1080       for (i=0; i<nzaij; i++) {
1081         for (j=0; j<q; j++) {
1082           idx[i*q+j] = colsaij[i]*q+j;
1083           v[i*q+j]   = vaij[i]*T[s+j*p];
1084         }
1085       }
1086     }
1087     if (S) {
1088       for (j=0; j<q; j++) {
1089         idx[c*q+j] = r*q+j;
1090         v[c*q+j]  += S[s+j*p];
1091       }
1092     }
1093   }
1094 
1095   if (ncols)    *ncols  = nz;
1096   if (cols)     *cols   = idx;
1097   if (values)   *values = v;
1098   PetscFunctionReturn(0);
1099 }
1100 
1101 PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1102 {
1103   PetscErrorCode ierr;
1104   PetscFunctionBegin;
1105   ierr = PetscFree2(*idx,*v);CHKERRQ(ierr);
1106   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
1107   PetscFunctionReturn(0);
1108 }
1109 
1110 PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
1111 {
1112   Mat_MPIKAIJ     *b      = (Mat_MPIKAIJ*) A->data;
1113   Mat             MatAIJ  = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ;
1114   Mat             MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ;
1115   Mat             AIJ     = b->A;
1116   PetscBool       diag    = PETSC_FALSE;
1117   PetscErrorCode  ierr;
1118   const PetscInt  rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray;
1119   PetscInt        nz,*idx,ncolsaij,ncolsoaij,*colsaij,*colsoaij,r,s,c,i,j,lrow;
1120   PetscScalar     *v,*vals,*ovals,*S=b->S,*T=b->T;
1121 
1122   PetscFunctionBegin;
1123   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1124   b->getrowactive = PETSC_TRUE;
1125   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1126   lrow = row - rstart;
1127 
1128   if ((!S) && (!T) && (!b->isTI)) {
1129     if (ncols)    *ncols  = 0;
1130     if (cols)     *cols   = NULL;
1131     if (values)   *values = NULL;
1132     PetscFunctionReturn(0);
1133   }
1134 
1135   r = lrow/p;
1136   s = lrow%p;
1137 
1138   if (T || b->isTI) {
1139     ierr = MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray);
1140     ierr = MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals);CHKERRQ(ierr);
1141     ierr = MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals);CHKERRQ(ierr);
1142 
1143     c     = ncolsaij + ncolsoaij;
1144     for (i=0; i<ncolsaij; i++) {
1145       /* check if this row contains a diagonal entry */
1146       if (colsaij[i] == r) {
1147         diag = PETSC_TRUE;
1148         c = i;
1149       }
1150     }
1151   } else c = 0;
1152 
1153   /* calculate size of row */
1154   nz = 0;
1155   if (S)            nz += q;
1156   if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q);
1157 
1158   if (cols || values) {
1159     ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr);
1160     for (i=0; i<q; i++) {
1161       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1162       v[i] = 0.0;
1163     }
1164     if (b->isTI) {
1165       for (i=0; i<ncolsaij; i++) {
1166         for (j=0; j<q; j++) {
1167           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1168           v[i*q+j]   = (j==s ? vals[i] : 0.0);
1169         }
1170       }
1171       for (i=0; i<ncolsoaij; i++) {
1172         for (j=0; j<q; j++) {
1173           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1174           v[(i+ncolsaij)*q+j]   = (j==s ? ovals[i]: 0.0);
1175         }
1176       }
1177     } else if (T) {
1178       for (i=0; i<ncolsaij; i++) {
1179         for (j=0; j<q; j++) {
1180           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1181           v[i*q+j]   = vals[i]*T[s+j*p];
1182         }
1183       }
1184       for (i=0; i<ncolsoaij; i++) {
1185         for (j=0; j<q; j++) {
1186           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1187           v[(i+ncolsaij)*q+j]   = ovals[i]*T[s+j*p];
1188         }
1189       }
1190     }
1191     if (S) {
1192       for (j=0; j<q; j++) {
1193         idx[c*q+j] = (r+rstart/p)*q+j;
1194         v[c*q+j]  += S[s+j*p];
1195       }
1196     }
1197   }
1198 
1199   if (ncols)  *ncols  = nz;
1200   if (cols)   *cols   = idx;
1201   if (values) *values = v;
1202   PetscFunctionReturn(0);
1203 }
1204 
1205 PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1206 {
1207   PetscErrorCode ierr;
1208   PetscFunctionBegin;
1209   ierr = PetscFree2(*idx,*v);CHKERRQ(ierr);
1210   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
1211   PetscFunctionReturn(0);
1212 }
1213 
1214 PetscErrorCode  MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
1215 {
1216   PetscErrorCode ierr;
1217   Mat            A;
1218 
1219   PetscFunctionBegin;
1220   ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1221   ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr);
1222   ierr = MatDestroy(&A);CHKERRQ(ierr);
1223   PetscFunctionReturn(0);
1224 }
1225 
1226 /* ---------------------------------------------------------------------------------- */
1227 /*@C
1228   MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form:
1229 
1230     [I \otimes S + A \otimes T]
1231 
1232   where
1233     S is a dense (p \times q) matrix
1234     T is a dense (p \times q) matrix
1235     A is an AIJ  (n \times n) matrix
1236     I is the identity matrix
1237   The resulting matrix is (np \times nq)
1238 
1239   The matrix type is based on MATSEQAIJ for a sequential matrix A, and MATMPIAIJ for a distributed matrix A.
1240   S is always stored independently on all processes as a PetscScalar array in column-major format.
1241 
1242   Collective
1243 
1244   Input Parameters:
1245 + A - the AIJ matrix
1246 . S - the S matrix, stored as a PetscScalar array (column-major)
1247 . T - the T matrix, stored as a PetscScalar array (column-major)
1248 . p - number of rows in S and T
1249 - q - number of columns in S and T
1250 
1251   Output Parameter:
1252 . kaij - the new KAIJ matrix
1253 
1254   Operations provided:
1255 + MatMult
1256 . MatMultAdd
1257 . MatInvertBlockDiagonal
1258 - MatView
1259 
1260   Level: advanced
1261 
1262 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MATKAIJ
1263 @*/
1264 PetscErrorCode  MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij)
1265 {
1266   PetscErrorCode ierr;
1267   PetscMPIInt    size;
1268 
1269   PetscFunctionBegin;
1270   ierr = MatCreate(PetscObjectComm((PetscObject)A),kaij);CHKERRQ(ierr);
1271   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
1272   if (size == 1) {
1273     ierr = MatSetType(*kaij,MATSEQKAIJ);CHKERRQ(ierr);
1274   } else {
1275     ierr = MatSetType(*kaij,MATMPIKAIJ);CHKERRQ(ierr);
1276   }
1277   ierr = MatKAIJSetAIJ(*kaij,A);CHKERRQ(ierr);
1278   ierr = MatKAIJSetS(*kaij,p,q,S);CHKERRQ(ierr);
1279   ierr = MatKAIJSetT(*kaij,p,q,T);CHKERRQ(ierr);
1280   ierr = MatSetUp(*kaij);
1281   PetscFunctionReturn(0);
1282 }
1283 
1284 /*MC
1285   MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of the following form:
1286 
1287     [I \otimes S + A \otimes T]
1288 
1289   where
1290     S is a dense (p \times q) matrix
1291     T is a dense (p \times q) matrix
1292     A is an AIJ  (n \times n) matrix
1293     I is the identity matrix
1294   The resulting matrix is (np \times nq)
1295 
1296   The matrix type is based on MATSEQAIJ for a sequential matrix A, and MATMPIAIJ for a distributed matrix A.
1297   S and T are always stored independently on all processes as a PetscScalar array in column-major format.
1298 
1299   Operations provided:
1300 + MatMult
1301 . MatMultAdd
1302 . MatInvertBlockDiagonal
1303 - MatView
1304 
1305   Level: advanced
1306 
1307 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ()
1308 M*/
1309 
1310 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A)
1311 {
1312   PetscErrorCode ierr;
1313   Mat_MPIKAIJ    *b;
1314   PetscMPIInt    size;
1315 
1316   PetscFunctionBegin;
1317   ierr     = PetscNewLog(A,&b);CHKERRQ(ierr);
1318   A->data  = (void*)b;
1319 
1320   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1321 
1322   A->ops->setup = MatSetUp_KAIJ;
1323 
1324   b->w    = 0;
1325   ierr    = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
1326   if (size == 1) {
1327     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ);CHKERRQ(ierr);
1328     A->ops->setup               = MatSetUp_KAIJ;
1329     A->ops->destroy             = MatDestroy_SeqKAIJ;
1330     A->ops->view                = MatView_SeqKAIJ;
1331     A->ops->mult                = MatMult_SeqKAIJ_N;
1332     A->ops->multadd             = MatMultAdd_SeqKAIJ_N;
1333     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ_N;
1334     A->ops->getrow              = MatGetRow_SeqKAIJ;
1335     A->ops->restorerow          = MatRestoreRow_SeqKAIJ;
1336     A->ops->sor                 = MatSOR_SeqKAIJ;
1337   } else {
1338     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ);CHKERRQ(ierr);
1339     A->ops->setup               = MatSetUp_KAIJ;
1340     A->ops->destroy             = MatDestroy_MPIKAIJ;
1341     A->ops->view                = MatView_MPIKAIJ;
1342     A->ops->mult                = MatMult_MPIKAIJ_dof;
1343     A->ops->multadd             = MatMultAdd_MPIKAIJ_dof;
1344     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ_dof;
1345     A->ops->getrow              = MatGetRow_MPIKAIJ;
1346     A->ops->restorerow          = MatRestoreRow_MPIKAIJ;
1347     ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ);CHKERRQ(ierr);
1348   }
1349   A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ;
1350   PetscFunctionReturn(0);
1351 }
1352