xref: /petsc/src/mat/impls/kaij/kaij.c (revision d3b90ce11b7685e8b68db97b4b6cc86e3ded9193)
1 
2 /*
3   Defines the basic matrix operations for the KAIJ  matrix storage format.
4   This format is used to evaluate matrices of the form:
5 
6     [I \otimes S + A \otimes T]
7 
8   where
9     S is a dense (p \times q) matrix
10     T is a dense (p \times q) matrix
11     A is an AIJ  (n \times n) matrix
12     I is the identity matrix
13 
14   The resulting matrix is (np \times nq)
15 
16   We provide:
17      MatMult()
18      MatMultAdd()
19      MatInvertBlockDiagonal()
20   and
21      MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*)
22 
23   This single directory handles both the sequential and parallel codes
24 */
25 
26 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/
27 #include <../src/mat/utils/freespace.h>
28 #include <petsc/private/vecimpl.h>
29 
30 /*@C
31    MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix
32 
33    Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel
34 
35    Input Parameter:
36 .  A - the KAIJ matrix
37 
38    Output Parameter:
39 .  B - the AIJ matrix
40 
41    Level: advanced
42 
43    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
44 
45 .seealso: MatCreateKAIJ()
46 @*/
47 PetscErrorCode  MatKAIJGetAIJ(Mat A,Mat *B)
48 {
49   PetscErrorCode ierr;
50   PetscBool      ismpikaij,isseqkaij;
51 
52   PetscFunctionBegin;
53   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr);
54   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij);CHKERRQ(ierr);
55   if (ismpikaij) {
56     Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
57 
58     *B = b->A;
59   } else if (isseqkaij) {
60     Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
61 
62     *B = b->AIJ;
63   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix passed in is not of type KAIJ");
64   PetscFunctionReturn(0);
65 }
66 
67 /*@C
68    MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix
69 
70    Not Collective; the entire S is stored and returned independently on all processes.
71 
72    Input Parameter:
73 .  A - the KAIJ matrix
74 
75    Output Parameters:
76 +  m - the number of rows in S
77 .  n - the number of columns in S
78 -  S - the S matrix, in form of a scalar array in column-major format
79 
80    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
81 
82    Level: advanced
83 
84 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
85 @*/
86 PetscErrorCode MatKAIJGetS(Mat A,PetscInt *m,PetscInt *n,PetscScalar **S)
87 {
88   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
89   PetscFunctionBegin;
90   if (m) *m = b->p;
91   if (n) *n = b->q;
92   if (S) *S = b->S;
93   PetscFunctionReturn(0);
94 }
95 
96 /*@C
97    MatKAIJGetSRead - Get a read-only pointer to the S matrix describing the shift action of the KAIJ matrix
98 
99    Not Collective; the entire S is stored and returned independently on all processes.
100 
101    Input Parameter:
102 .  A - the KAIJ matrix
103 
104    Output Parameters:
105 +  m - the number of rows in S
106 .  n - the number of columns in S
107 -  S - the S matrix, in form of a scalar array in column-major format
108 
109    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
110 
111    Level: advanced
112 
113 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
114 @*/
115 PetscErrorCode MatKAIJGetSRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **S)
116 {
117   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
118   PetscFunctionBegin;
119   if (m) *m = b->p;
120   if (n) *n = b->q;
121   if (S) *S = b->S;
122   PetscFunctionReturn(0);
123 }
124 
125 /*@C
126   MatKAIJRestoreS - Restore array obtained with MatKAIJGetS()
127 
128   Not collective
129 
130   Input Parameter:
131 . A - the KAIJ matrix
132 
133   Output Parameter:
134 . S - location of pointer to array obtained with MatKAIJGetS()
135 
136   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
137   If NULL is passed, it will not attempt to zero the array pointer.
138 
139   Level: advanced
140 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead()
141 @*/
142 PetscErrorCode MatKAIJRestoreS(Mat A,PetscScalar **S)
143 {
144   PetscFunctionBegin;
145   if (S) *S = NULL;
146   PetscObjectStateIncrease((PetscObject)A);
147   PetscFunctionReturn(0);
148 }
149 
150 /*@C
151   MatKAIJRestoreSRead - Restore array obtained with MatKAIJGetSRead()
152 
153   Not collective
154 
155   Input Parameter:
156 . A - the KAIJ matrix
157 
158   Output Parameter:
159 . S - location of pointer to array obtained with MatKAIJGetS()
160 
161   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
162   If NULL is passed, it will not attempt to zero the array pointer.
163 
164   Level: advanced
165 .seealso: MatKAIJGetS(), MatKAIJGetSRead(), MatKAIJRestoreSRead()
166 @*/
167 PetscErrorCode MatKAIJRestoreSRead(Mat A,const PetscScalar **S)
168 {
169   PetscFunctionBegin;
170   if (S) *S = NULL;
171   PetscFunctionReturn(0);
172 }
173 
174 /*@C
175    MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix
176 
177    Not Collective; the entire T is stored and returned independently on all processes
178 
179    Input Parameter:
180 .  A - the KAIJ matrix
181 
182    Output Parameter:
183 +  m - the number of rows in T
184 .  n - the number of columns in T
185 -  T - the T matrix, in form of a scalar array in column-major format
186 
187    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
188 
189    Level: advanced
190 
191 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
192 @*/
193 PetscErrorCode MatKAIJGetT(Mat A,PetscInt *m,PetscInt *n,PetscScalar **T)
194 {
195   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
196   PetscFunctionBegin;
197   if (m) *m = b->p;
198   if (n) *n = b->q;
199   if (T) *T = b->T;
200   PetscFunctionReturn(0);
201 }
202 
203 /*@C
204    MatKAIJGetTRead - Get a read-only pointer to the transformation matrix T associated with the KAIJ matrix
205 
206    Not Collective; the entire T is stored and returned independently on all processes
207 
208    Input Parameter:
209 .  A - the KAIJ matrix
210 
211    Output Parameter:
212 +  m - the number of rows in T
213 .  n - the number of columns in T
214 -  T - the T matrix, in form of a scalar array in column-major format
215 
216    Note: All output parameters are optional (pass NULL or PETSC_IGNORE if not desired)
217 
218    Level: advanced
219 
220 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
221 @*/
222 PetscErrorCode MatKAIJGetTRead(Mat A,PetscInt *m,PetscInt *n,const PetscScalar **T)
223 {
224   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
225   PetscFunctionBegin;
226   if (m) *m = b->p;
227   if (n) *n = b->q;
228   if (T) *T = b->T;
229   PetscFunctionReturn(0);
230 }
231 
232 /*@C
233   MatKAIJRestoreT - Restore array obtained with MatKAIJGetT()
234 
235   Not collective
236 
237   Input Parameter:
238 . A - the KAIJ matrix
239 
240   Output Parameter:
241 . T - location of pointer to array obtained with MatKAIJGetS()
242 
243   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
244   If NULL is passed, it will not attempt to zero the array pointer.
245 
246   Level: advanced
247 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead()
248 @*/
249 PetscErrorCode MatKAIJRestoreT(Mat A,PetscScalar **T)
250 {
251   PetscFunctionBegin;
252   if (T) *T = NULL;
253   PetscObjectStateIncrease((PetscObject)A);
254   PetscFunctionReturn(0);
255 }
256 
257 /*@C
258   MatKAIJRestoreTRead - Restore array obtained with MatKAIJGetTRead()
259 
260   Not collective
261 
262   Input Parameter:
263 . A - the KAIJ matrix
264 
265   Output Parameter:
266 . T - location of pointer to array obtained with MatKAIJGetS()
267 
268   Note: This routine zeros the array pointer to prevent accidental reuse after it has been restored.
269   If NULL is passed, it will not attempt to zero the array pointer.
270 
271   Level: advanced
272 .seealso: MatKAIJGetT(), MatKAIJGetTRead(), MatKAIJRestoreTRead()
273 @*/
274 PetscErrorCode MatKAIJRestoreTRead(Mat A,const PetscScalar **T)
275 {
276   PetscFunctionBegin;
277   if (T) *T = NULL;
278   PetscFunctionReturn(0);
279 }
280 
281 /*@
282    MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix
283 
284    Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel
285 
286    Input Parameters:
287 +  A - the KAIJ matrix
288 -  B - the AIJ matrix
289 
290    Notes:
291    This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed.
292    Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix.
293 
294    Level: advanced
295 
296 .seealso: MatKAIJGetAIJ(), MatKAIJSetS(), MatKAIJSetT()
297 @*/
298 PetscErrorCode MatKAIJSetAIJ(Mat A,Mat B)
299 {
300   PetscErrorCode ierr;
301   PetscMPIInt    size;
302 
303   PetscFunctionBegin;
304   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
305   if (size == 1) {
306     Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
307     a->AIJ = B;
308   } else {
309     Mat_MPIKAIJ *a = (Mat_MPIKAIJ*)A->data;
310     a->A = B;
311   }
312   ierr = PetscObjectReference((PetscObject)B);CHKERRQ(ierr);
313   PetscFunctionReturn(0);
314 }
315 
316 /*@C
317    MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix
318 
319    Logically Collective; the entire S is stored independently on all processes.
320 
321    Input Parameters:
322 +  A - the KAIJ matrix
323 .  p - the number of rows in S
324 .  q - the number of columns in S
325 -  S - the S matrix, in form of a scalar array in column-major format
326 
327    Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix.
328    The S matrix is copied, so the user can destroy this array.
329 
330    Level: Advanced
331 
332 .seealso: MatKAIJGetS(), MatKAIJSetT(), MatKAIJSetAIJ()
333 @*/
334 PetscErrorCode MatKAIJSetS(Mat A,PetscInt p,PetscInt q,const PetscScalar S[])
335 {
336   PetscErrorCode ierr;
337   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
338 
339   PetscFunctionBegin;
340   ierr = PetscFree(a->S);CHKERRQ(ierr);
341   if (S) {
342     ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->S);CHKERRQ(ierr);
343     ierr = PetscMemcpy(a->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
344   } else  a->S = NULL;
345 
346   a->p = p;
347   a->q = q;
348   PetscFunctionReturn(0);
349 }
350 
351 /*@C
352    MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix
353 
354    Logically Collective; the entire T is stored independently on all processes.
355 
356    Input Parameters:
357 +  A - the KAIJ matrix
358 .  p - the number of rows in S
359 .  q - the number of columns in S
360 -  T - the T matrix, in form of a scalar array in column-major format
361 
362    Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix.
363    The T matrix is copied, so the user can destroy this array.
364 
365    Level: Advanced
366 
367 .seealso: MatKAIJGetT(), MatKAIJSetS(), MatKAIJSetAIJ()
368 @*/
369 PetscErrorCode MatKAIJSetT(Mat A,PetscInt p,PetscInt q,const PetscScalar T[])
370 {
371   PetscErrorCode ierr;
372   PetscInt       i,j;
373   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
374   PetscBool      isTI = PETSC_FALSE;
375 
376   PetscFunctionBegin;
377   /* check if T is an identity matrix */
378   if (T && (p == q)) {
379     isTI = PETSC_TRUE;
380     for (i=0; i<p; i++) {
381       for (j=0; j<q; j++) {
382         if (i == j) {
383           /* diagonal term must be 1 */
384           if (T[i+j*p] != 1.0) isTI = PETSC_FALSE;
385         } else {
386           /* off-diagonal term must be 0 */
387           if (T[i+j*p] != 0.0) isTI = PETSC_FALSE;
388         }
389       }
390     }
391   }
392   a->isTI = isTI;
393 
394   ierr = PetscFree(a->T);CHKERRQ(ierr);
395   if (T && (!isTI)) {
396     ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->T);CHKERRQ(ierr);
397     ierr = PetscMemcpy(a->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
398   } else a->T = NULL;
399 
400   a->p = p;
401   a->q = q;
402   PetscFunctionReturn(0);
403 }
404 
405 PetscErrorCode MatDestroy_SeqKAIJ(Mat A)
406 {
407   PetscErrorCode ierr;
408   Mat_SeqKAIJ    *b = (Mat_SeqKAIJ*)A->data;
409 
410   PetscFunctionBegin;
411   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
412   ierr = PetscFree(b->S);CHKERRQ(ierr);
413   ierr = PetscFree(b->T);CHKERRQ(ierr);
414   ierr = PetscFree(b->ibdiag);CHKERRQ(ierr);
415   ierr = PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr);CHKERRQ(ierr);
416   ierr = PetscFree(A->data);CHKERRQ(ierr);
417   PetscFunctionReturn(0);
418 }
419 
420 PetscErrorCode MatSetUp_KAIJ(Mat A)
421 {
422   PetscErrorCode ierr;
423   PetscInt       n;
424   PetscMPIInt    size;
425   Mat_SeqKAIJ    *seqkaij = (Mat_SeqKAIJ*)A->data;
426 
427   PetscFunctionBegin;
428   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
429   if (size == 1) {
430     ierr = MatSetSizes(A,seqkaij->p*seqkaij->AIJ->rmap->n,seqkaij->q*seqkaij->AIJ->cmap->n,seqkaij->p*seqkaij->AIJ->rmap->N,seqkaij->q*seqkaij->AIJ->cmap->N);CHKERRQ(ierr);
431     ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr);
432     ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr);
433     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
434     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
435   } else {
436     Mat_MPIKAIJ *a;
437     Mat_MPIAIJ  *mpiaij;
438     IS          from,to;
439     Vec         gvec;
440     PetscScalar *T;
441     PetscInt    i,j;
442 
443     a = (Mat_MPIKAIJ*)A->data;
444     mpiaij = (Mat_MPIAIJ*)a->A->data;
445     ierr = MatSetSizes(A,a->p*a->A->rmap->n,a->q*a->A->cmap->n,a->p*a->A->rmap->N,a->q*a->A->cmap->N);CHKERRQ(ierr);
446     ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr);
447     ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr);
448     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
449     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
450 
451     if (a->isTI) {
452       /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL.
453        * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will
454        * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix
455        * to pass in. */
456       ierr = PetscMalloc1(a->p*a->q*sizeof(PetscScalar),&T);CHKERRQ(ierr);
457       for (i=0; i<a->p; i++) {
458         for (j=0; j<a->q; j++) {
459           if (i==j) T[i+j*a->p] = 1.0;
460           else      T[i+j*a->p] = 0.0;
461         }
462       }
463     } else T = a->T;
464     ierr = MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ);CHKERRQ(ierr);
465     ierr = MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ);CHKERRQ(ierr);
466     if (a->isTI) {
467       ierr = PetscFree(T);CHKERRQ(ierr);
468     }
469 
470     ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
471     ierr = VecCreate(PETSC_COMM_SELF,&a->w);CHKERRQ(ierr);
472     ierr = VecSetSizes(a->w,n*a->q,n*a->q);CHKERRQ(ierr);
473     ierr = VecSetBlockSize(a->w,a->q);CHKERRQ(ierr);
474     ierr = VecSetType(a->w,VECSEQ);CHKERRQ(ierr);
475 
476     /* create two temporary Index sets for build scatter gather */
477     ierr = ISCreateBlock(PetscObjectComm((PetscObject)a->A),a->q,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
478     ierr = ISCreateStride(PETSC_COMM_SELF,n*a->q,0,1,&to);CHKERRQ(ierr);
479 
480     /* create temporary global vector to generate scatter context */
481     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A),a->q,a->q*a->A->cmap->n,a->q*a->A->cmap->N,NULL,&gvec);CHKERRQ(ierr);
482 
483     /* generate the scatter context */
484     ierr = VecScatterCreate(gvec,from,a->w,to,&a->ctx);CHKERRQ(ierr);
485 
486     ierr = ISDestroy(&from);CHKERRQ(ierr);
487     ierr = ISDestroy(&to);CHKERRQ(ierr);
488     ierr = VecDestroy(&gvec);CHKERRQ(ierr);
489   }
490 
491   A->assembled = PETSC_TRUE;
492   PetscFunctionReturn(0);
493 }
494 
495 PetscErrorCode MatView_SeqKAIJ(Mat A,PetscViewer viewer)
496 {
497   PetscErrorCode ierr;
498   Mat            B;
499 
500   PetscFunctionBegin;
501   ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
502   ierr = MatView(B,viewer);CHKERRQ(ierr);
503   ierr = MatDestroy(&B);CHKERRQ(ierr);
504   PetscFunctionReturn(0);
505 }
506 
507 PetscErrorCode MatView_MPIKAIJ(Mat A,PetscViewer viewer)
508 {
509   PetscErrorCode ierr;
510   Mat            B;
511 
512   PetscFunctionBegin;
513   ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
514   ierr = MatView(B,viewer);CHKERRQ(ierr);
515   ierr = MatDestroy(&B);CHKERRQ(ierr);
516   PetscFunctionReturn(0);
517 }
518 
519 PetscErrorCode MatDestroy_MPIKAIJ(Mat A)
520 {
521   PetscErrorCode ierr;
522   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
523 
524   PetscFunctionBegin;
525   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
526   ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr);
527   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
528   ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr);
529   ierr = VecDestroy(&b->w);CHKERRQ(ierr);
530   ierr = PetscFree(b->S);CHKERRQ(ierr);
531   ierr = PetscFree(b->T);CHKERRQ(ierr);
532   ierr = PetscFree(b->ibdiag);CHKERRQ(ierr);
533   ierr = PetscFree(A->data);CHKERRQ(ierr);
534   PetscFunctionReturn(0);
535 }
536 
537 /* --------------------------------------------------------------------------------------*/
538 
539 /* zz = yy + Axx */
540 PetscErrorCode MatMultAdd_SeqKAIJ(Mat A,Vec xx,Vec yy,Vec zz)
541 {
542   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ*)A->data;
543   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
544   const PetscScalar *s = b->S, *t = b->T;
545   const PetscScalar *x,*v,*bx;
546   PetscScalar       *y,*sums;
547   PetscErrorCode    ierr;
548   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
549   PetscInt          n,i,jrow,j,l,p=b->p,q=b->q,k;
550 
551   PetscFunctionBegin;
552   if (!yy) {
553     ierr = VecSet(zz,0.0);CHKERRQ(ierr);
554   } else {
555     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
556   }
557   if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0);
558 
559   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
560   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
561   idx  = a->j;
562   v    = a->a;
563   ii   = a->i;
564 
565   if (b->isTI) {
566     for (i=0; i<m; i++) {
567       jrow = ii[i];
568       n    = ii[i+1] - jrow;
569       sums = y + p*i;
570       for (j=0; j<n; j++) {
571         for (k=0; k<p; k++) {
572           sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k];
573         }
574       }
575     }
576   } else if (t) {
577     for (i=0; i<m; i++) {
578       jrow = ii[i];
579       n    = ii[i+1] - jrow;
580       sums = y + p*i;
581       bx   = x + q*i;
582       for (j=0; j<n; j++) {
583         for (k=0; k<p; k++) {
584           for (l=0; l<q; l++) {
585             sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l];
586           }
587         }
588       }
589     }
590   }
591   if (s) {
592     for (i=0; i<m; i++) {
593       sums = y + p*i;
594       bx   = x + q*i;
595       if (i < b->AIJ->cmap->n) {
596         for (j=0; j<q; j++) {
597           for (k=0; k<p; k++) {
598             sums[k] += s[k+j*p]*bx[j];
599           }
600         }
601       }
602     }
603   }
604 
605   ierr = PetscLogFlops((2.0*p*q-p)*m+2*p*a->nz);CHKERRQ(ierr);
606   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
607   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
608   PetscFunctionReturn(0);
609 }
610 
611 PetscErrorCode MatMult_SeqKAIJ(Mat A,Vec xx,Vec yy)
612 {
613   PetscErrorCode ierr;
614   PetscFunctionBegin;
615   ierr = MatMultAdd_SeqKAIJ(A,xx,PETSC_NULL,yy);CHKERRQ(ierr);
616   PetscFunctionReturn(0);
617 }
618 
619 #include <petsc/private/kernels/blockinvert.h>
620 
621 PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ(Mat A,const PetscScalar **values)
622 {
623   Mat_SeqKAIJ       *b  = (Mat_SeqKAIJ*)A->data;
624   Mat_SeqAIJ        *a  = (Mat_SeqAIJ*)b->AIJ->data;
625   const PetscScalar *S  = b->S;
626   const PetscScalar *T  = b->T;
627   const PetscScalar *v  = a->a;
628   const PetscInt     p  = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i;
629   PetscErrorCode    ierr;
630   PetscInt          i,j,*v_pivots,dof,dof2;
631   PetscScalar       *diag,aval,*v_work;
632 
633   PetscFunctionBegin;
634   if (p != q) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse.");
635   if ((!S) && (!T) && (!b->isTI)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix.");
636 
637   dof  = p;
638   dof2 = dof*dof;
639 
640   if (b->ibdiagvalid) {
641     if (values) *values = b->ibdiag;
642     PetscFunctionReturn(0);
643   }
644   if (!b->ibdiag) {
645     ierr = PetscMalloc1(dof2*m*sizeof(PetscScalar),&b->ibdiag);CHKERRQ(ierr);
646     ierr = PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar));CHKERRQ(ierr);
647   }
648   if (values) *values = b->ibdiag;
649   diag = b->ibdiag;
650 
651   ierr = PetscMalloc2(dof,&v_work,dof,&v_pivots);CHKERRQ(ierr);
652   for (i=0; i<m; i++) {
653     if (S) {
654       ierr = PetscMemcpy(diag,S,dof2*sizeof(PetscScalar));CHKERRQ(ierr);
655     } else {
656       ierr = PetscMemzero(diag,dof2*sizeof(PetscScalar));CHKERRQ(ierr);
657     }
658     if (b->isTI) {
659       aval = 0;
660       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
661       for (j=0; j<dof; j++) diag[j+dof*j] += aval;
662     } else if (T) {
663       aval = 0;
664       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
665       for (j=0; j<dof2; j++) diag[j] += aval*T[j];
666     }
667     ierr = PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL);CHKERRQ(ierr);
668     diag += dof2;
669   }
670   ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
671 
672   b->ibdiagvalid = PETSC_TRUE;
673   PetscFunctionReturn(0);
674 }
675 
676 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B)
677 {
678   Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data;
679 
680   PetscFunctionBegin;
681   *B = kaij->AIJ;
682   PetscFunctionReturn(0);
683 }
684 
685 PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
686 {
687   PetscErrorCode    ierr;
688   Mat_SeqKAIJ       *kaij = (Mat_SeqKAIJ*) A->data;
689   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)kaij->AIJ->data;
690   const PetscScalar *aa = a->a, *T = kaij->T, *v;
691   const PetscInt    m  = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi;
692   const PetscScalar *b, *xb, *idiag;
693   PetscScalar       *x, *work, *workt, *w, *y, *arr, *t, *arrt;
694   PetscInt          i, j, k, i2, bs, bs2, nz;
695 
696   PetscFunctionBegin;
697   its = its*lits;
698   if (flag & SOR_EISENSTAT) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
699   if (its <= 0)             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
700   if (fshift)               SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for diagonal shift");
701   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for applying upper or lower triangular parts");
702   if (p != q) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: No support for non-square dense blocks");
703   else        {bs = p; bs2 = bs*bs; }
704 
705   if (!m) PetscFunctionReturn(0);
706 
707   if (!kaij->ibdiagvalid) { ierr = MatInvertBlockDiagonal_SeqKAIJ(A,NULL);CHKERRQ(ierr); }
708   idiag = kaij->ibdiag;
709   diag  = a->diag;
710 
711   if (!kaij->sor.setup) {
712     ierr = PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr);CHKERRQ(ierr);
713     kaij->sor.setup = PETSC_TRUE;
714   }
715   y     = kaij->sor.y;
716   w     = kaij->sor.w;
717   work  = kaij->sor.work;
718   t     = kaij->sor.t;
719   arr   = kaij->sor.arr;
720 
721   ierr = VecGetArray(xx,&x);    CHKERRQ(ierr);
722   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
723 
724   if (flag & SOR_ZERO_INITIAL_GUESS) {
725     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
726       PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);                            /* x[0:bs] <- D^{-1} b[0:bs] */
727       ierr   =  PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
728       i2     =  bs;
729       idiag  += bs2;
730       for (i=1; i<m; i++) {
731         v  = aa + ai[i];
732         vi = aj + ai[i];
733         nz = diag[i] - ai[i];
734 
735         if (T) {                /* b - T (Arow * x) */
736           ierr = PetscMemzero(w,bs*sizeof(PetscScalar));
737           for (j=0; j<nz; j++) {
738             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
739           }
740           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]);
741           for (k=0; k<bs; k++) t[i2+k] += b[i2+k];
742         } else if (kaij->isTI) {
743           ierr = PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
744           for (j=0; j<nz; j++) {
745             for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k];
746           }
747         } else {
748           ierr = PetscMemcpy(t+i2,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
749         }
750 
751         PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y);
752         for (j=0; j<bs; j++) x[i2+j] = omega * y[j];
753 
754         idiag += bs2;
755         i2    += bs;
756       }
757       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
758       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
759       xb = t;
760     } else xb = b;
761     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
762       idiag = kaij->ibdiag+bs2*(m-1);
763       i2    = bs * (m-1);
764       ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
765       PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
766       i2    -= bs;
767       idiag -= bs2;
768       for (i=m-2; i>=0; i--) {
769         v  = aa + diag[i] + 1 ;
770         vi = aj + diag[i] + 1;
771         nz = ai[i+1] - diag[i] - 1;
772 
773         if (T) {                /* FIXME: This branch untested */
774           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
775           /* copy all rows of x that are needed into contiguous space */
776           workt = work;
777           for (j=0; j<nz; j++) {
778             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
779             workt += bs;
780           }
781           arrt = arr;
782           for (j=0; j<nz; j++) {
783             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
784             for (k=0; k<bs2; k++) arrt[k] *= v[j];
785             arrt += bs2;
786           }
787           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
788         } else if (kaij->isTI) {
789           ierr = PetscMemcpy(w,t+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
790           for (j=0; j<nz; j++) {
791             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
792           }
793         }
794 
795         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */
796         for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j];
797 
798         idiag -= bs2;
799         i2    -= bs;
800       }
801       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
802     }
803     its--;
804   }
805   while (its--) {               /* FIXME: This branch not updated */
806     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
807       i2     =  0;
808       idiag  = kaij->ibdiag;
809       for (i=0; i<m; i++) {
810         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
811 
812         v  = aa + ai[i];
813         vi = aj + ai[i];
814         nz = diag[i] - ai[i];
815         workt = work;
816         for (j=0; j<nz; j++) {
817           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
818           workt += bs;
819         }
820         arrt = arr;
821         if (T) {
822           for (j=0; j<nz; j++) {
823             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
824             for (k=0; k<bs2; k++) arrt[k] *= v[j];
825             arrt += bs2;
826           }
827           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
828         } else if (kaij->isTI) {
829           for (j=0; j<nz; j++) {
830             ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
831             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
832             arrt += bs2;
833           }
834           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
835         }
836         ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
837 
838         v  = aa + diag[i] + 1;
839         vi = aj + diag[i] + 1;
840         nz = ai[i+1] - diag[i] - 1;
841         workt = work;
842         for (j=0; j<nz; j++) {
843           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
844           workt += bs;
845         }
846         arrt = arr;
847         if (T) {
848           for (j=0; j<nz; j++) {
849             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
850             for (k=0; k<bs2; k++) arrt[k] *= v[j];
851             arrt += bs2;
852           }
853           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
854         } else if (kaij->isTI) {
855           for (j=0; j<nz; j++) {
856             ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
857             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
858             arrt += bs2;
859           }
860           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
861         }
862 
863         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
864         for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
865 
866         idiag += bs2;
867         i2    += bs;
868       }
869       xb = t;
870     }
871     else xb = b;
872     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
873       idiag = kaij->ibdiag+bs2*(m-1);
874       i2    = bs * (m-1);
875       if (xb == b) {
876         for (i=m-1; i>=0; i--) {
877           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
878 
879           v  = aa + ai[i];
880           vi = aj + ai[i];
881           nz = diag[i] - ai[i];
882           workt = work;
883           for (j=0; j<nz; j++) {
884             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
885             workt += bs;
886           }
887           arrt = arr;
888           if (T) {
889             for (j=0; j<nz; j++) {
890               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
891               for (k=0; k<bs2; k++) arrt[k] *= v[j];
892               arrt += bs2;
893             }
894             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
895           } else if (kaij->isTI) {
896             for (j=0; j<nz; j++) {
897               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
898               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
899               arrt += bs2;
900             }
901             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
902           }
903 
904           v  = aa + diag[i] + 1;
905           vi = aj + diag[i] + 1;
906           nz = ai[i+1] - diag[i] - 1;
907           workt = work;
908           for (j=0; j<nz; j++) {
909             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
910             workt += bs;
911           }
912           arrt = arr;
913           if (T) {
914             for (j=0; j<nz; j++) {
915               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
916               for (k=0; k<bs2; k++) arrt[k] *= v[j];
917               arrt += bs2;
918             }
919             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
920           } else if (kaij->isTI) {
921             for (j=0; j<nz; j++) {
922               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
923               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
924               arrt += bs2;
925             }
926             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
927           }
928 
929           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
930           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
931         }
932       } else {
933         for (i=m-1; i>=0; i--) {
934           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
935           v  = aa + diag[i] + 1;
936           vi = aj + diag[i] + 1;
937           nz = ai[i+1] - diag[i] - 1;
938           workt = work;
939           for (j=0; j<nz; j++) {
940             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
941             workt += bs;
942           }
943           arrt = arr;
944           if (T) {
945             for (j=0; j<nz; j++) {
946               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
947               for (k=0; k<bs2; k++) arrt[k] *= v[j];
948               arrt += bs2;
949             }
950             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
951           } else if (kaij->isTI) {
952             for (j=0; j<nz; j++) {
953               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
954               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
955               arrt += bs2;
956             }
957             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
958           }
959           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
960           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
961         }
962         idiag -= bs2;
963         i2    -= bs;
964       }
965       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
966     }
967   }
968 
969   ierr = VecRestoreArray(xx,&x);    CHKERRQ(ierr);
970   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
971   PetscFunctionReturn(0);
972 }
973 
974 /*===================================================================================*/
975 
976 PetscErrorCode MatMultAdd_MPIKAIJ(Mat A,Vec xx,Vec yy,Vec zz)
977 {
978   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
979   PetscErrorCode ierr;
980 
981   PetscFunctionBegin;
982   if (!yy) {
983     ierr = VecSet(zz,0.0);CHKERRQ(ierr);
984   } else {
985     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
986   }
987   /* start the scatter */
988   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
989   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz);CHKERRQ(ierr);
990   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
991   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
992   PetscFunctionReturn(0);
993 }
994 
995 PetscErrorCode MatMult_MPIKAIJ(Mat A,Vec xx,Vec yy)
996 {
997   PetscErrorCode ierr;
998   PetscFunctionBegin;
999   ierr = MatMultAdd_MPIKAIJ(A,xx,PETSC_NULL,yy);CHKERRQ(ierr);
1000   PetscFunctionReturn(0);
1001 }
1002 
1003 PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ(Mat A,const PetscScalar **values)
1004 {
1005   Mat_MPIKAIJ     *b = (Mat_MPIKAIJ*)A->data;
1006   PetscErrorCode  ierr;
1007 
1008   PetscFunctionBegin;
1009   ierr = (*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values);CHKERRQ(ierr);
1010   PetscFunctionReturn(0);
1011 }
1012 
1013 /* ----------------------------------------------------------------*/
1014 
1015 PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
1016 {
1017   Mat_SeqKAIJ     *b   = (Mat_SeqKAIJ*) A->data;
1018   PetscErrorCode  diag = PETSC_FALSE;
1019   PetscErrorCode  ierr;
1020   PetscInt        nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c;
1021   PetscScalar     *vaij,*v,*S=b->S,*T=b->T;
1022 
1023   PetscFunctionBegin;
1024   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1025   b->getrowactive = PETSC_TRUE;
1026   if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row);
1027 
1028   if ((!S) && (!T) && (!b->isTI)) {
1029     if (ncols)    *ncols  = 0;
1030     if (cols)     *cols   = NULL;
1031     if (values)   *values = NULL;
1032     PetscFunctionReturn(0);
1033   }
1034 
1035   if (T || b->isTI) {
1036     ierr  = MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij);CHKERRQ(ierr);
1037     c     = nzaij;
1038     for (i=0; i<nzaij; i++) {
1039       /* check if this row contains a diagonal entry */
1040       if (colsaij[i] == r) {
1041         diag = PETSC_TRUE;
1042         c = i;
1043       }
1044     }
1045   } else nzaij = c = 0;
1046 
1047   /* calculate size of row */
1048   nz = 0;
1049   if (S)            nz += q;
1050   if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q);
1051 
1052   if (cols || values) {
1053     ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr);
1054     for (i=0; i<q; i++) {
1055       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1056       v[i] = 0.0;
1057     }
1058     if (b->isTI) {
1059       for (i=0; i<nzaij; i++) {
1060         for (j=0; j<q; j++) {
1061           idx[i*q+j] = colsaij[i]*q+j;
1062           v[i*q+j]   = (j==s ? vaij[i] : 0);
1063         }
1064       }
1065     } else if (T) {
1066       for (i=0; i<nzaij; i++) {
1067         for (j=0; j<q; j++) {
1068           idx[i*q+j] = colsaij[i]*q+j;
1069           v[i*q+j]   = vaij[i]*T[s+j*p];
1070         }
1071       }
1072     }
1073     if (S) {
1074       for (j=0; j<q; j++) {
1075         idx[c*q+j] = r*q+j;
1076         v[c*q+j]  += S[s+j*p];
1077       }
1078     }
1079   }
1080 
1081   if (ncols)    *ncols  = nz;
1082   if (cols)     *cols   = idx;
1083   if (values)   *values = v;
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1088 {
1089   PetscErrorCode ierr;
1090   PetscFunctionBegin;
1091   ierr = PetscFree2(*idx,*v);CHKERRQ(ierr);
1092   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
1093   PetscFunctionReturn(0);
1094 }
1095 
1096 PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
1097 {
1098   Mat_MPIKAIJ     *b      = (Mat_MPIKAIJ*) A->data;
1099   Mat             MatAIJ  = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ;
1100   Mat             MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ;
1101   Mat             AIJ     = b->A;
1102   PetscBool       diag    = PETSC_FALSE;
1103   PetscErrorCode  ierr;
1104   const PetscInt  rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray;
1105   PetscInt        nz,*idx,ncolsaij,ncolsoaij,*colsaij,*colsoaij,r,s,c,i,j,lrow;
1106   PetscScalar     *v,*vals,*ovals,*S=b->S,*T=b->T;
1107 
1108   PetscFunctionBegin;
1109   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1110   b->getrowactive = PETSC_TRUE;
1111   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
1112   lrow = row - rstart;
1113 
1114   if ((!S) && (!T) && (!b->isTI)) {
1115     if (ncols)    *ncols  = 0;
1116     if (cols)     *cols   = NULL;
1117     if (values)   *values = NULL;
1118     PetscFunctionReturn(0);
1119   }
1120 
1121   r = lrow/p;
1122   s = lrow%p;
1123 
1124   if (T || b->isTI) {
1125     ierr = MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray);
1126     ierr = MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals);CHKERRQ(ierr);
1127     ierr = MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals);CHKERRQ(ierr);
1128 
1129     c     = ncolsaij + ncolsoaij;
1130     for (i=0; i<ncolsaij; i++) {
1131       /* check if this row contains a diagonal entry */
1132       if (colsaij[i] == r) {
1133         diag = PETSC_TRUE;
1134         c = i;
1135       }
1136     }
1137   } else c = 0;
1138 
1139   /* calculate size of row */
1140   nz = 0;
1141   if (S)            nz += q;
1142   if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q);
1143 
1144   if (cols || values) {
1145     ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr);
1146     for (i=0; i<q; i++) {
1147       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1148       v[i] = 0.0;
1149     }
1150     if (b->isTI) {
1151       for (i=0; i<ncolsaij; i++) {
1152         for (j=0; j<q; j++) {
1153           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1154           v[i*q+j]   = (j==s ? vals[i] : 0.0);
1155         }
1156       }
1157       for (i=0; i<ncolsoaij; i++) {
1158         for (j=0; j<q; j++) {
1159           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1160           v[(i+ncolsaij)*q+j]   = (j==s ? ovals[i]: 0.0);
1161         }
1162       }
1163     } else if (T) {
1164       for (i=0; i<ncolsaij; i++) {
1165         for (j=0; j<q; j++) {
1166           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1167           v[i*q+j]   = vals[i]*T[s+j*p];
1168         }
1169       }
1170       for (i=0; i<ncolsoaij; i++) {
1171         for (j=0; j<q; j++) {
1172           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1173           v[(i+ncolsaij)*q+j]   = ovals[i]*T[s+j*p];
1174         }
1175       }
1176     }
1177     if (S) {
1178       for (j=0; j<q; j++) {
1179         idx[c*q+j] = (r+rstart/p)*q+j;
1180         v[c*q+j]  += S[s+j*p];
1181       }
1182     }
1183   }
1184 
1185   if (ncols)  *ncols  = nz;
1186   if (cols)   *cols   = idx;
1187   if (values) *values = v;
1188   PetscFunctionReturn(0);
1189 }
1190 
1191 PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1192 {
1193   PetscErrorCode ierr;
1194   PetscFunctionBegin;
1195   ierr = PetscFree2(*idx,*v);CHKERRQ(ierr);
1196   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
1197   PetscFunctionReturn(0);
1198 }
1199 
1200 PetscErrorCode  MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
1201 {
1202   PetscErrorCode ierr;
1203   Mat            A;
1204 
1205   PetscFunctionBegin;
1206   ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1207   ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr);
1208   ierr = MatDestroy(&A);CHKERRQ(ierr);
1209   PetscFunctionReturn(0);
1210 }
1211 
1212 /* ---------------------------------------------------------------------------------- */
1213 /*@C
1214   MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form:
1215 
1216     [I \otimes S + A \otimes T]
1217 
1218   where
1219     S is a dense (p \times q) matrix
1220     T is a dense (p \times q) matrix
1221     A is an AIJ  (n \times n) matrix
1222     I is the identity matrix
1223   The resulting matrix is (np \times nq)
1224 
1225   S and T are always stored independently on all processes as PetscScalar arrays in column-major format.
1226 
1227   Collective
1228 
1229   Input Parameters:
1230 + A - the AIJ matrix
1231 . p - number of rows in S and T
1232 . q - number of columns in S and T
1233 . S - the S matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major)
1234 - T - the T matrix (can be PETSC_NULL), stored as a PetscScalar array (column-major)
1235 
1236   Output Parameter:
1237 . kaij - the new KAIJ matrix
1238 
1239   Notes:
1240   This function increases the reference count on the AIJ matrix, so the user is free to destroy the matrix if it is not needed.
1241   Changes to the entries of the AIJ matrix will immediately affect the KAIJ matrix.
1242 
1243   Level: advanced
1244 
1245 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MATKAIJ
1246 @*/
1247 PetscErrorCode  MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij)
1248 {
1249   PetscErrorCode ierr;
1250   PetscMPIInt    size;
1251 
1252   PetscFunctionBegin;
1253   ierr = MatCreate(PetscObjectComm((PetscObject)A),kaij);CHKERRQ(ierr);
1254   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
1255   if (size == 1) {
1256     ierr = MatSetType(*kaij,MATSEQKAIJ);CHKERRQ(ierr);
1257   } else {
1258     ierr = MatSetType(*kaij,MATMPIKAIJ);CHKERRQ(ierr);
1259   }
1260   ierr = MatKAIJSetAIJ(*kaij,A);CHKERRQ(ierr);
1261   ierr = MatKAIJSetS(*kaij,p,q,S);CHKERRQ(ierr);
1262   ierr = MatKAIJSetT(*kaij,p,q,T);CHKERRQ(ierr);
1263   ierr = MatSetUp(*kaij);
1264   PetscFunctionReturn(0);
1265 }
1266 
1267 /*MC
1268   MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of the following form:
1269 
1270     [I \otimes S + A \otimes T]
1271 
1272   where
1273     S is a dense (p \times q) matrix
1274     T is a dense (p \times q) matrix
1275     A is an AIJ  (n \times n) matrix
1276     I is the identity matrix
1277   The resulting matrix is (np \times nq)
1278 
1279   S and T are always stored independently on all processes as PetscScalar arrays in column-major format.
1280 
1281   Level: advanced
1282 
1283 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ()
1284 M*/
1285 
1286 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A)
1287 {
1288   PetscErrorCode ierr;
1289   Mat_MPIKAIJ    *b;
1290   PetscMPIInt    size;
1291 
1292   PetscFunctionBegin;
1293   ierr     = PetscNewLog(A,&b);CHKERRQ(ierr);
1294   A->data  = (void*)b;
1295 
1296   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1297 
1298   A->ops->setup = MatSetUp_KAIJ;
1299 
1300   b->w    = 0;
1301   ierr    = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
1302   if (size == 1) {
1303     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ);CHKERRQ(ierr);
1304     A->ops->setup               = MatSetUp_KAIJ;
1305     A->ops->destroy             = MatDestroy_SeqKAIJ;
1306     A->ops->view                = MatView_SeqKAIJ;
1307     A->ops->mult                = MatMult_SeqKAIJ;
1308     A->ops->multadd             = MatMultAdd_SeqKAIJ;
1309     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ;
1310     A->ops->getrow              = MatGetRow_SeqKAIJ;
1311     A->ops->restorerow          = MatRestoreRow_SeqKAIJ;
1312     A->ops->sor                 = MatSOR_SeqKAIJ;
1313   } else {
1314     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ);CHKERRQ(ierr);
1315     A->ops->setup               = MatSetUp_KAIJ;
1316     A->ops->destroy             = MatDestroy_MPIKAIJ;
1317     A->ops->view                = MatView_MPIKAIJ;
1318     A->ops->mult                = MatMult_MPIKAIJ;
1319     A->ops->multadd             = MatMultAdd_MPIKAIJ;
1320     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ;
1321     A->ops->getrow              = MatGetRow_MPIKAIJ;
1322     A->ops->restorerow          = MatRestoreRow_MPIKAIJ;
1323     ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ);CHKERRQ(ierr);
1324   }
1325   A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ;
1326   PetscFunctionReturn(0);
1327 }
1328