xref: /petsc/src/mat/impls/kaij/kaij.c (revision b04351cbedba3de176a543a26e11c6ba03e9a955)
1 
2 /*
3   Defines the basic matrix operations for the KAIJ  matrix storage format.
4   This format is used to evaluate matrices of the form:
5 
6     [I \otimes S + A \otimes T]
7 
8   where
9     S is a dense (p \times q) matrix
10     T is a dense (p \times q) matrix
11     A is an AIJ  (n \times n) matrix
12     I is the identity matrix
13 
14   The resulting matrix is (np \times nq)
15 
16   We provide:
17      MatMult()
18      MatMultAdd()
19      MatInvertBlockDiagonal()
20   and
21      MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*)
22 
23   This single directory handles both the sequential and parallel codes
24 */
25 
26 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/
27 #include <../src/mat/utils/freespace.h>
28 #include <petsc/private/vecimpl.h>
29 
30 /*@C
31    MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix
32 
33    Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel
34 
35    Input Parameter:
36 .  A - the KAIJ matrix
37 
38    Output Parameter:
39 .  B - the AIJ matrix
40 
41    Level: advanced
42 
43    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
44 
45 .seealso: MatCreateKAIJ()
46 @*/
47 PetscErrorCode  MatKAIJGetAIJ(Mat A,Mat *B)
48 {
49   PetscErrorCode ierr;
50   PetscBool      ismpikaij,isseqkaij;
51 
52   PetscFunctionBegin;
53   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr);
54   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij);CHKERRQ(ierr);
55   if (ismpikaij) {
56     Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
57 
58     *B = b->A;
59   } else if (isseqkaij) {
60     Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
61 
62     *B = b->AIJ;
63   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix passed in is not of type KAIJ");
64   PetscFunctionReturn(0);
65 }
66 
67 /*@C
68    MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix
69 
70    Not Collective; the entire S is stored and returned independently on all processes.
71 
72    Input Parameter:
73 .  A - the KAIJ matrix
74 
75    Output Parameter:
76 .  S - the S matrix, in form of a scalar array in column-major format
77 
78    Notes:
79    The dimensions of the output array can be determined by calling MatGetBlockSizes() on the KAIJ matrix.
80    The row block and column block sizes returned will correspond to the number of rows and columns, respectively, in S.
81 
82    Level: advanced
83 
84 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
85 @*/
86 PetscErrorCode  MatKAIJGetS(Mat A,const PetscScalar **S)
87 {
88   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
89   PetscFunctionBegin;
90   *S = b->S;
91   PetscFunctionReturn(0);
92 }
93 
94 /*@C
95    MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix
96 
97    Not Collective; the entire T is stored and returned independently on all processes
98 
99    Input Parameter:
100 .  A - the KAIJ matrix
101 
102    Output Parameter:
103 .  T - the T matrix, in form of a scalar array in column-major format
104 
105    Notes:
106    The dimensions of the output array can be determined by calling MatGetBlockSizes() on the KAIJ matrix.
107    The row block and column block sizes returned will correspond to the number of rows and columns, respectively, in T.
108 
109    Level: advanced
110 
111 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
112 @*/
113 PetscErrorCode  MatKAIJGetT(Mat A,const PetscScalar **T)
114 {
115   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
116   PetscFunctionBegin;
117   *T = b->T;
118   PetscFunctionReturn(0);
119 }
120 
121 /*@
122    MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix
123 
124    Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel
125 
126    Input Parameters:
127 +  A - the KAIJ matrix
128 -  B - the AIJ matrix
129 
130    Level: advanced
131 
132 .seealso: MatKAIJGetAIJ(), MatKAIJSetS(), MatKAIJSetT()
133 @*/
134 PetscErrorCode MatKAIJSetAIJ(Mat A,Mat B)
135 {
136   PetscErrorCode ierr;
137   PetscMPIInt    size;
138 
139   PetscFunctionBegin;
140   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
141   if (size == 1) {
142     Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
143     a->AIJ = B;
144   } else {
145     Mat_MPIKAIJ *a = (Mat_MPIKAIJ*)A->data;
146     a->A = B;
147   }
148   PetscFunctionReturn(0);
149 }
150 
151 /*@C
152    MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix
153 
154    Logically Collective; the entire S is stored independently on all processes.
155 
156    Input Parameters:
157 +  A - the KAIJ matrix
158 .  p - the number of rows in S
159 .  q - the number of columns in S
160 -  S - the S matrix, in form of a scalar array in column-major format
161 
162    Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix.
163 
164    Level: Advanced
165 
166 .seealso: MatKAIJGetS(), MatKAIJSetT(), MatKAIJSetAIJ()
167 @*/
168 PetscErrorCode MatKAIJSetS(Mat A,PetscInt p,PetscInt q,const PetscScalar S[])
169 {
170   PetscErrorCode ierr;
171   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
172 
173   PetscFunctionBegin;
174   if (a->S) {
175     ierr = PetscFree(a->S);CHKERRQ(ierr);
176   }
177   if (S) {
178     ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->S);CHKERRQ(ierr);
179     ierr = PetscMemcpy(a->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
180   } else  a->S = NULL;
181 
182   a->p = p;
183   a->q = q;
184 
185   PetscFunctionReturn(0);
186 }
187 
188 /*@C
189    MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix
190 
191    Logically Collective; the entire T is stored independently on all processes.
192 
193    Input Parameters:
194 +  A - the KAIJ matrix
195 .  p - the number of rows in S
196 .  q - the number of columns in S
197 -  T - the T matrix, in form of a scalar array in column-major format
198 
199    Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix.
200 
201    Level: Advanced
202 
203 .seealso: MatKAIJGetT(), MatKAIJSetS(), MatKAIJSetAIJ()
204 @*/
205 PetscErrorCode MatKAIJSetT(Mat A,PetscInt p,PetscInt q,const PetscScalar T[])
206 {
207   PetscErrorCode ierr;
208   PetscInt       i,j;
209   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
210   PetscBool      isTI = PETSC_FALSE;
211 
212   PetscFunctionBegin;
213   /* check if T is an identity matrix */
214   if (T && (p == q)) {
215     isTI = PETSC_TRUE;
216     for (i=0; i<p; i++) {
217       for (j=0; j<q; j++) {
218         if (i == j) {
219           /* diagonal term must be 1 */
220           if (T[i+j*p] != 1.0) isTI = PETSC_FALSE;
221         } else {
222           /* off-diagonal term must be 0 */
223           if (T[i+j*p] != 0.0) isTI = PETSC_FALSE;
224         }
225       }
226     }
227   }
228   a->isTI = isTI;
229 
230   if (a->T) {
231     ierr = PetscFree(a->T);CHKERRQ(ierr);
232   }
233   if (T && (!isTI)) {
234     ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->T);CHKERRQ(ierr);
235     ierr = PetscMemcpy(a->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
236   } else a->T = NULL;
237 
238   a->p = p;
239   a->q = q;
240   PetscFunctionReturn(0);
241 }
242 
243 PetscErrorCode MatDestroy_SeqKAIJ(Mat A)
244 {
245   PetscErrorCode ierr;
246   Mat_SeqKAIJ    *b = (Mat_SeqKAIJ*)A->data;
247 
248   PetscFunctionBegin;
249   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
250   ierr = PetscFree(b->S);CHKERRQ(ierr);
251   ierr = PetscFree(b->T);CHKERRQ(ierr);
252   ierr = PetscFree(b->ibdiag);CHKERRQ(ierr);
253   ierr = PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr);CHKERRQ(ierr);
254   ierr = PetscFree(A->data);CHKERRQ(ierr);
255   PetscFunctionReturn(0);
256 }
257 
258 PetscErrorCode MatSetUp_KAIJ(Mat A)
259 {
260   PetscErrorCode ierr;
261   PetscInt       n;
262   PetscMPIInt    size;
263   Mat_SeqKAIJ    *seqkaij = (Mat_SeqKAIJ*)A->data;
264 
265   PetscFunctionBegin;
266   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
267   if (size == 1) {
268     ierr = MatSetSizes(A,seqkaij->p*seqkaij->AIJ->rmap->n,seqkaij->q*seqkaij->AIJ->cmap->n,seqkaij->p*seqkaij->AIJ->rmap->N,seqkaij->q*seqkaij->AIJ->cmap->N);CHKERRQ(ierr);
269     ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr);
270     ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr);
271     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
272     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
273     ierr = PetscObjectReference((PetscObject)seqkaij->AIJ);CHKERRQ(ierr);
274   } else {
275     Mat_MPIKAIJ *a;
276     Mat_MPIAIJ  *mpiaij;
277     IS          from,to;
278     Vec         gvec;
279     PetscScalar *T;
280     PetscInt    i,j;
281 
282     a = (Mat_MPIKAIJ*)A->data;
283     mpiaij = (Mat_MPIAIJ*)a->A->data;
284     ierr = MatSetSizes(A,a->p*a->A->rmap->n,a->q*a->A->cmap->n,a->p*a->A->rmap->N,a->q*a->A->cmap->N);CHKERRQ(ierr);
285     ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr);
286     ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr);
287     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
288     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
289     ierr = PetscObjectReference((PetscObject)a->A);CHKERRQ(ierr);
290 
291     if (a->isTI) {
292       /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL.
293        * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will
294        * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix
295        * to pass in. */
296       ierr = PetscMalloc1(a->p*a->q*sizeof(PetscScalar),&T);CHKERRQ(ierr);
297       for (i=0; i<a->p; i++) {
298         for (j=0; j<a->q; j++) {
299           if (i==j) T[i+j*a->p] = 1.0;
300           else      T[i+j*a->p] = 0.0;
301         }
302       }
303     } else T = a->T;
304     ierr = MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ);CHKERRQ(ierr);
305     ierr = MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ);CHKERRQ(ierr);
306     if (a->isTI) {
307       ierr = PetscFree(T);CHKERRQ(ierr);
308     }
309 
310     ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
311     ierr = VecCreate(PETSC_COMM_SELF,&a->w);CHKERRQ(ierr);
312     ierr = VecSetSizes(a->w,n*a->q,n*a->q);CHKERRQ(ierr);
313     ierr = VecSetBlockSize(a->w,a->q);CHKERRQ(ierr);
314     ierr = VecSetType(a->w,VECSEQ);CHKERRQ(ierr);
315 
316     /* create two temporary Index sets for build scatter gather */
317     ierr = ISCreateBlock(PetscObjectComm((PetscObject)a->A),a->q,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
318     ierr = ISCreateStride(PETSC_COMM_SELF,n*a->q,0,1,&to);CHKERRQ(ierr);
319 
320     /* create temporary global vector to generate scatter context */
321     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A),a->q,a->q*a->A->cmap->n,a->q*a->A->cmap->N,NULL,&gvec);CHKERRQ(ierr);
322 
323     /* generate the scatter context */
324     ierr = VecScatterCreate(gvec,from,a->w,to,&a->ctx);CHKERRQ(ierr);
325 
326     ierr = ISDestroy(&from);CHKERRQ(ierr);
327     ierr = ISDestroy(&to);CHKERRQ(ierr);
328     ierr = VecDestroy(&gvec);CHKERRQ(ierr);
329   }
330 
331   A->assembled = PETSC_TRUE;
332   PetscFunctionReturn(0);
333 }
334 
335 PetscErrorCode MatView_SeqKAIJ(Mat A,PetscViewer viewer)
336 {
337   PetscErrorCode ierr;
338   Mat            B;
339 
340   PetscFunctionBegin;
341   ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
342   ierr = MatView(B,viewer);CHKERRQ(ierr);
343   ierr = MatDestroy(&B);CHKERRQ(ierr);
344   PetscFunctionReturn(0);
345 }
346 
347 PetscErrorCode MatView_MPIKAIJ(Mat A,PetscViewer viewer)
348 {
349   PetscErrorCode ierr;
350   Mat            B;
351 
352   PetscFunctionBegin;
353   ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
354   ierr = MatView(B,viewer);CHKERRQ(ierr);
355   ierr = MatDestroy(&B);CHKERRQ(ierr);
356   PetscFunctionReturn(0);
357 }
358 
359 PetscErrorCode MatDestroy_MPIKAIJ(Mat A)
360 {
361   PetscErrorCode ierr;
362   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
363 
364   PetscFunctionBegin;
365   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
366   ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr);
367   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
368   ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr);
369   ierr = VecDestroy(&b->w);CHKERRQ(ierr);
370   ierr = PetscFree(b->S);CHKERRQ(ierr);
371   ierr = PetscFree(b->T);CHKERRQ(ierr);
372   ierr = PetscFree(b->ibdiag);CHKERRQ(ierr);
373   ierr = PetscFree(A->data);CHKERRQ(ierr);
374   PetscFunctionReturn(0);
375 }
376 
377 
378 /* --------------------------------------------------------------------------------------*/
379 
380 /* zz = yy + Axx */
381 PetscErrorCode KAIJMultAdd_Seq(Mat A,Vec xx,Vec yy,Vec zz)
382 {
383   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ*)A->data;
384   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
385   const PetscScalar *s = b->S, *t = b->T;
386   const PetscScalar *x,*v,*bx;
387   PetscScalar       *y,*sums;
388   PetscErrorCode    ierr;
389   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
390   PetscInt          n,i,jrow,j,l,p=b->p,q=b->q,k;
391 
392   PetscFunctionBegin;
393   if (!yy) {
394     ierr = VecSet(zz,0.0);CHKERRQ(ierr);
395   } else {
396     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
397   }
398   if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0);
399 
400   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
401   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
402   idx  = a->j;
403   v    = a->a;
404   ii   = a->i;
405 
406   if (b->isTI) {
407     for (i=0; i<m; i++) {
408       jrow = ii[i];
409       n    = ii[i+1] - jrow;
410       sums = y + p*i;
411       for (j=0; j<n; j++) {
412         for (k=0; k<p; k++) {
413           sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k];
414         }
415       }
416     }
417   } else if (t) {
418     for (i=0; i<m; i++) {
419       jrow = ii[i];
420       n    = ii[i+1] - jrow;
421       sums = y + p*i;
422       bx   = x + q*i;
423       for (j=0; j<n; j++) {
424         for (k=0; k<p; k++) {
425           for (l=0; l<q; l++) {
426             sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l];
427           }
428         }
429       }
430     }
431   }
432   if (s) {
433     for (i=0; i<m; i++) {
434       jrow = ii[i];
435       n    = ii[i+1] - jrow;
436       sums = y + p*i;
437       bx   = x + q*i;
438       if (i < b->AIJ->cmap->n) {
439         for (j=0; j<q; j++) {
440           for (k=0; k<p; k++) {
441             sums[k] += s[k+j*p]*bx[j];
442           }
443         }
444       }
445     }
446   }
447 
448   ierr = PetscLogFlops((2.0*p*q-p)*m+2*p*a->nz);CHKERRQ(ierr);
449   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
450   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
451   PetscFunctionReturn(0);
452 }
453 
454 PetscErrorCode MatMult_SeqKAIJ_N(Mat A,Vec xx,Vec yy)
455 {
456   PetscErrorCode ierr;
457   PetscFunctionBegin;
458   ierr = KAIJMultAdd_Seq(A,xx,PETSC_NULL,yy);CHKERRQ(ierr);
459   PetscFunctionReturn(0);
460 }
461 
462 PetscErrorCode MatMultAdd_SeqKAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
463 {
464   PetscErrorCode ierr;
465   PetscFunctionBegin;
466   ierr = KAIJMultAdd_Seq(A,xx,yy,zz);CHKERRQ(ierr);
467   PetscFunctionReturn(0);
468 }
469 
470 #include <petsc/private/kernels/blockinvert.h>
471 
472 PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ_N(Mat A,const PetscScalar **values)
473 {
474   Mat_SeqKAIJ       *b  = (Mat_SeqKAIJ*)A->data;
475   Mat_SeqAIJ        *a  = (Mat_SeqAIJ*)b->AIJ->data;
476   const PetscScalar *S  = b->S;
477   const PetscScalar *T  = b->T;
478   const PetscScalar *v  = a->a;
479   const PetscInt     p  = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i;
480   PetscErrorCode    ierr;
481   PetscInt          i,j,*v_pivots,dof,dof2;
482   PetscScalar       *diag,aval,*v_work;
483 
484   PetscFunctionBegin;
485   if (p != q) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse.");
486   if ((!S) && (!T) && (!b->isTI)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix.");
487 
488   dof  = p;
489   dof2 = dof*dof;
490 
491   if (b->ibdiagvalid) {
492     if (values) *values = b->ibdiag;
493     PetscFunctionReturn(0);
494   }
495   if (!b->ibdiag) {
496     ierr = PetscMalloc1(dof2*m*sizeof(PetscScalar),&b->ibdiag);CHKERRQ(ierr);
497     ierr = PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar));CHKERRQ(ierr);
498   }
499   if (values) *values = b->ibdiag;
500   diag = b->ibdiag;
501 
502   ierr = PetscMalloc2(dof,&v_work,dof,&v_pivots);CHKERRQ(ierr);
503   for (i=0; i<m; i++) {
504     if (S) {
505       ierr = PetscMemcpy(diag,S,dof2*sizeof(PetscScalar));CHKERRQ(ierr);
506     } else {
507       ierr = PetscMemzero(diag,dof2*sizeof(PetscScalar));CHKERRQ(ierr);
508     }
509     if (b->isTI) {
510       aval = 0;
511       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
512       for (j=0; j<dof; j++) diag[j+dof*j] += aval;
513     } else if (T) {
514       aval = 0;
515       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
516       for (j=0; j<dof2; j++) diag[j] += aval*T[j];
517     }
518     ierr = PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL);CHKERRQ(ierr);
519     diag += dof2;
520   }
521   ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
522 
523   b->ibdiagvalid = PETSC_TRUE;
524   PetscFunctionReturn(0);
525 }
526 
527 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B)
528 {
529   Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data;
530 
531   PetscFunctionBegin;
532   *B = kaij->AIJ;
533   PetscFunctionReturn(0);
534 }
535 
536 PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
537 {
538   PetscErrorCode    ierr;
539   Mat_SeqKAIJ       *kaij = (Mat_SeqKAIJ*) A->data;
540   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)kaij->AIJ->data;
541   const PetscScalar *aa = a->a, *T = kaij->T, *v;
542   const PetscInt    m  = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi;
543   const PetscScalar *b, *xb, *idiag;
544   PetscScalar       *x, *work, *workt, *w, *y, *arr, *t, *arrt;
545   PetscInt          i, j, k, i2, bs, bs2, nz;
546 
547   PetscFunctionBegin;
548   its = its*lits;
549   if (flag & SOR_EISENSTAT) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
550   if (its <= 0)             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
551   if (fshift)               SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
552   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
553   if (p != q) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: Sorry, no support for non-square dense blocks");
554   else        {bs = p; bs2 = bs*bs; }
555 
556   if (!m) PetscFunctionReturn(0);
557 
558   if (!kaij->ibdiagvalid) { ierr = MatInvertBlockDiagonal_SeqKAIJ_N(A,NULL);CHKERRQ(ierr); }
559   idiag = kaij->ibdiag;
560   diag  = a->diag;
561 
562   if (!kaij->sor.setup) {
563     ierr = PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr);CHKERRQ(ierr);
564     kaij->sor.setup = PETSC_TRUE;
565   }
566   y     = kaij->sor.y;
567   w     = kaij->sor.w;
568   work  = kaij->sor.work;
569   t     = kaij->sor.t;
570   arr   = kaij->sor.arr;
571 
572   ierr = VecGetArray(xx,&x);    CHKERRQ(ierr);
573   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
574 
575   if (flag & SOR_ZERO_INITIAL_GUESS) {
576     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
577       PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);                            /* x[0:bs] <- D^{-1} b[0:bs] */
578       ierr   =  PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
579       i2     =  bs;
580       idiag  += bs2;
581       for (i=1; i<m; i++) {
582         v  = aa + ai[i];
583         vi = aj + ai[i];
584         nz = diag[i] - ai[i];
585 
586         if (T) {                /* b - T (Arow * x) */
587           for (k=0; k<bs; k++) w[k] = 0;
588           for (j=0; j<nz; j++) {
589             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
590           }
591           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]);
592           for (k=0; k<bs; k++) t[i2+k] += b[i2+k];
593         } else if (kaij->isTI) {
594           for (k=0; k<bs; k++) t[i2+k] = b[i2+k];
595           for (j=0; j<nz; j++) {
596             for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k];
597           }
598         } else {
599           for (k=0; k<bs; k++) t[i2+k] = b[i2+k];
600         }
601 
602         PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y);
603         for (j=0; j<bs; j++) x[i2+j] = omega * y[j];
604 
605         idiag += bs2;
606         i2    += bs;
607       }
608       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
609       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
610       xb = t;
611     } else xb = b;
612     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
613       idiag = kaij->ibdiag+bs2*(m-1);
614       i2    = bs * (m-1);
615       ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
616       PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
617       i2    -= bs;
618       idiag -= bs2;
619       for (i=m-2; i>=0; i--) {
620         v  = aa + diag[i] + 1 ;
621         vi = aj + diag[i] + 1;
622         nz = ai[i+1] - diag[i] - 1;
623 
624         if (T) {                /* FIXME: This branch untested */
625           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
626           /* copy all rows of x that are needed into contiguous space */
627           workt = work;
628           for (j=0; j<nz; j++) {
629             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
630             workt += bs;
631           }
632           arrt = arr;
633           for (j=0; j<nz; j++) {
634             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
635             for (k=0; k<bs2; k++) arrt[k] *= v[j];
636             arrt += bs2;
637           }
638           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
639         } else if (kaij->isTI) {
640           for (k=0; k<bs; k++) w[k] = t[i2+k];
641           for (j=0; j<nz; j++) {
642             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
643           }
644         }
645 
646         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */
647         for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j];
648 
649         idiag -= bs2;
650         i2    -= bs;
651       }
652       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
653     }
654     its--;
655   }
656   while (its--) {               /* FIXME: This branch not updated */
657     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
658       i2     =  0;
659       idiag  = kaij->ibdiag;
660       for (i=0; i<m; i++) {
661         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
662 
663         v  = aa + ai[i];
664         vi = aj + ai[i];
665         nz = diag[i] - ai[i];
666         workt = work;
667         for (j=0; j<nz; j++) {
668           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
669           workt += bs;
670         }
671         arrt = arr;
672         if (T) {
673           for (j=0; j<nz; j++) {
674             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
675             for (k=0; k<bs2; k++) arrt[k] *= v[j];
676             arrt += bs2;
677           }
678           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
679         } else if (kaij->isTI) {
680           for (j=0; j<nz; j++) {
681             ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
682             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
683             arrt += bs2;
684           }
685           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
686         }
687         ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
688 
689         v  = aa + diag[i] + 1;
690         vi = aj + diag[i] + 1;
691         nz = ai[i+1] - diag[i] - 1;
692         workt = work;
693         for (j=0; j<nz; j++) {
694           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
695           workt += bs;
696         }
697         arrt = arr;
698         if (T) {
699           for (j=0; j<nz; j++) {
700             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
701             for (k=0; k<bs2; k++) arrt[k] *= v[j];
702             arrt += bs2;
703           }
704           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
705         } else if (kaij->isTI) {
706           for (j=0; j<nz; j++) {
707             ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
708             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
709             arrt += bs2;
710           }
711           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
712         }
713 
714         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
715         for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
716 
717         idiag += bs2;
718         i2    += bs;
719       }
720       xb = t;
721     }
722     else xb = b;
723     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
724       idiag = kaij->ibdiag+bs2*(m-1);
725       i2    = bs * (m-1);
726       if (xb == b) {
727         for (i=m-1; i>=0; i--) {
728           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
729 
730           v  = aa + ai[i];
731           vi = aj + ai[i];
732           nz = diag[i] - ai[i];
733           workt = work;
734           for (j=0; j<nz; j++) {
735             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
736             workt += bs;
737           }
738           arrt = arr;
739           if (T) {
740             for (j=0; j<nz; j++) {
741               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
742               for (k=0; k<bs2; k++) arrt[k] *= v[j];
743               arrt += bs2;
744             }
745             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
746           } else if (kaij->isTI) {
747             for (j=0; j<nz; j++) {
748               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
749               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
750               arrt += bs2;
751             }
752             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
753           }
754 
755           v  = aa + diag[i] + 1;
756           vi = aj + diag[i] + 1;
757           nz = ai[i+1] - diag[i] - 1;
758           workt = work;
759           for (j=0; j<nz; j++) {
760             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
761             workt += bs;
762           }
763           arrt = arr;
764           if (T) {
765             for (j=0; j<nz; j++) {
766               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
767               for (k=0; k<bs2; k++) arrt[k] *= v[j];
768               arrt += bs2;
769             }
770             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
771           } else if (kaij->isTI) {
772             for (j=0; j<nz; j++) {
773               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
774               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
775               arrt += bs2;
776             }
777             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
778           }
779 
780           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
781           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
782         }
783       } else {
784         for (i=m-1; i>=0; i--) {
785           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
786           v  = aa + diag[i] + 1;
787           vi = aj + diag[i] + 1;
788           nz = ai[i+1] - diag[i] - 1;
789           workt = work;
790           for (j=0; j<nz; j++) {
791             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
792             workt += bs;
793           }
794           arrt = arr;
795           if (T) {
796             for (j=0; j<nz; j++) {
797               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
798               for (k=0; k<bs2; k++) arrt[k] *= v[j];
799               arrt += bs2;
800             }
801             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
802           } else if (kaij->isTI) {
803             for (j=0; j<nz; j++) {
804               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
805               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
806               arrt += bs2;
807             }
808             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
809           }
810           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
811           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
812         }
813         idiag -= bs2;
814         i2    -= bs;
815       }
816       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
817     }
818   }
819 
820   ierr = VecRestoreArray(xx,&x);    CHKERRQ(ierr);
821   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
822   PetscFunctionReturn(0);
823 }
824 
825 /*===================================================================================*/
826 
827 PetscErrorCode KAIJMultAdd_MPI(Mat A,Vec xx,Vec yy,Vec zz)
828 {
829   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
830   PetscErrorCode ierr;
831 
832   PetscFunctionBegin;
833   if (!yy) {
834     ierr = VecSet(zz,0.0);CHKERRQ(ierr);
835   } else {
836     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
837   }
838   /* start the scatter */
839   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
840   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz);CHKERRQ(ierr);
841   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
842   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
843   PetscFunctionReturn(0);
844 }
845 
846 PetscErrorCode MatMult_MPIKAIJ_dof(Mat A,Vec xx,Vec yy)
847 {
848   PetscErrorCode ierr;
849   PetscFunctionBegin;
850   ierr = KAIJMultAdd_MPI(A,xx,PETSC_NULL,yy);CHKERRQ(ierr);
851   PetscFunctionReturn(0);
852 }
853 
854 PetscErrorCode MatMultAdd_MPIKAIJ_dof(Mat A,Vec xx,Vec yy, Vec zz)
855 {
856   PetscErrorCode ierr;
857   PetscFunctionBegin;
858   ierr = KAIJMultAdd_MPI(A,xx,yy,zz);CHKERRQ(ierr);
859   PetscFunctionReturn(0);
860 }
861 
862 PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ_dof(Mat A,const PetscScalar **values)
863 {
864   Mat_MPIKAIJ     *b = (Mat_MPIKAIJ*)A->data;
865   PetscErrorCode  ierr;
866 
867   PetscFunctionBegin;
868   ierr = (*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values);CHKERRQ(ierr);
869   PetscFunctionReturn(0);
870 }
871 
872 /* ----------------------------------------------------------------*/
873 
874 PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
875 {
876   Mat_SeqKAIJ     *b   = (Mat_SeqKAIJ*) A->data;
877   PetscErrorCode  diag = PETSC_FALSE;
878   PetscErrorCode  ierr;
879   PetscInt        nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c;
880   PetscScalar     *vaij,*v,*S=b->S,*T=b->T;
881 
882   PetscFunctionBegin;
883   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
884   b->getrowactive = PETSC_TRUE;
885   if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row);
886 
887   if ((!S) && (!T) && (!b->isTI)) {
888     if (ncols)    *ncols  = 0;
889     if (cols)     *cols   = NULL;
890     if (values)   *values = NULL;
891     PetscFunctionReturn(0);
892   }
893 
894   if (T || b->isTI) {
895     ierr  = MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij);CHKERRQ(ierr);
896     c     = nzaij;
897     for (i=0; i<nzaij; i++) {
898       /* check if this row contains a diagonal entry */
899       if (colsaij[i] == r) {
900         diag = PETSC_TRUE;
901         c = i;
902       }
903     }
904   } else nzaij = c = 0;
905 
906   /* calculate size of row */
907   nz = 0;
908   if (S)            nz += q;
909   if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q);
910 
911   if (cols || values) {
912     ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr);
913     for (i=0; i<q; i++) {
914       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
915       v[i] = 0.0;
916     }
917     if (b->isTI) {
918       for (i=0; i<nzaij; i++) {
919         for (j=0; j<q; j++) {
920           idx[i*q+j] = colsaij[i]*q+j;
921           v[i*q+j]   = (j==s ? vaij[i] : 0);
922         }
923       }
924     } else if (T) {
925       for (i=0; i<nzaij; i++) {
926         for (j=0; j<q; j++) {
927           idx[i*q+j] = colsaij[i]*q+j;
928           v[i*q+j]   = vaij[i]*T[s+j*p];
929         }
930       }
931     }
932     if (S) {
933       for (j=0; j<q; j++) {
934         idx[c*q+j] = r*q+j;
935         v[c*q+j]  += S[s+j*p];
936       }
937     }
938   }
939 
940   if (ncols)    *ncols  = nz;
941   if (cols)     *cols   = idx;
942   if (values)   *values = v;
943 
944   PetscFunctionReturn(0);
945 }
946 
947 PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
948 {
949   PetscErrorCode ierr;
950   PetscFunctionBegin;
951   ierr = PetscFree2(*idx,*v);CHKERRQ(ierr);
952   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
953   PetscFunctionReturn(0);
954 }
955 
956 PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
957 {
958   Mat_MPIKAIJ     *b      = (Mat_MPIKAIJ*) A->data;
959   Mat             MatAIJ  = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ;
960   Mat             MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ;
961   Mat             AIJ     = b->A;
962   PetscBool       diag    = PETSC_FALSE;
963   PetscErrorCode  ierr;
964   const PetscInt  rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray;
965   PetscInt        nz,*idx,ncolsaij,ncolsoaij,*colsaij,*colsoaij,r,s,c,i,j,lrow;
966   PetscScalar     *v,*vals,*ovals,*S=b->S,*T=b->T;
967 
968   PetscFunctionBegin;
969   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
970   b->getrowactive = PETSC_TRUE;
971   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
972   lrow = row - rstart;
973 
974   if ((!S) && (!T) && (!b->isTI)) {
975     if (ncols)    *ncols  = 0;
976     if (cols)     *cols   = NULL;
977     if (values)   *values = NULL;
978     PetscFunctionReturn(0);
979   }
980 
981   r = lrow/p;
982   s = lrow%p;
983 
984   if (T || b->isTI) {
985     ierr = MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray);
986     ierr = MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals);CHKERRQ(ierr);
987     ierr = MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals);CHKERRQ(ierr);
988 
989     c     = ncolsaij + ncolsoaij;
990     for (i=0; i<ncolsaij; i++) {
991       /* check if this row contains a diagonal entry */
992       if (colsaij[i] == r) {
993         diag = PETSC_TRUE;
994         c = i;
995       }
996     }
997   } else c = 0;
998 
999   /* calculate size of row */
1000   nz = 0;
1001   if (S)            nz += q;
1002   if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q);
1003 
1004   if (cols || values) {
1005     ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr);
1006     for (i=0; i<q; i++) {
1007       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1008       v[i] = 0.0;
1009     }
1010     if (b->isTI) {
1011       for (i=0; i<ncolsaij; i++) {
1012         for (j=0; j<q; j++) {
1013           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1014           v[i*q+j]   = (j==s ? vals[i] : 0.0);
1015         }
1016       }
1017       for (i=0; i<ncolsoaij; i++) {
1018         for (j=0; j<q; j++) {
1019           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1020           v[(i+ncolsaij)*q+j]   = (j==s ? ovals[i]: 0.0);
1021         }
1022       }
1023     } else if (T) {
1024       for (i=0; i<ncolsaij; i++) {
1025         for (j=0; j<q; j++) {
1026           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1027           v[i*q+j]   = vals[i]*T[s+j*p];
1028         }
1029       }
1030       for (i=0; i<ncolsoaij; i++) {
1031         for (j=0; j<q; j++) {
1032           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1033           v[(i+ncolsaij)*q+j]   = ovals[i]*T[s+j*p];
1034         }
1035       }
1036     }
1037     if (S) {
1038       for (j=0; j<q; j++) {
1039         idx[c*q+j] = (r+rstart/p)*q+j;
1040         v[c*q+j]  += S[s+j*p];
1041       }
1042     }
1043   }
1044 
1045   if (ncols)  *ncols  = nz;
1046   if (cols)   *cols   = idx;
1047   if (values) *values = v;
1048   PetscFunctionReturn(0);
1049 }
1050 
1051 PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1052 {
1053   PetscErrorCode ierr;
1054   PetscFunctionBegin;
1055   ierr = PetscFree2(*idx,*v);CHKERRQ(ierr);
1056   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
1057   PetscFunctionReturn(0);
1058 }
1059 
1060 PetscErrorCode  MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
1061 {
1062   PetscErrorCode ierr;
1063   Mat            A;
1064 
1065   PetscFunctionBegin;
1066   ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1067   ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr);
1068   ierr = MatDestroy(&A);CHKERRQ(ierr);
1069   PetscFunctionReturn(0);
1070 }
1071 
1072 /* ---------------------------------------------------------------------------------- */
1073 /*@C
1074   MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form:
1075 
1076     [I \otimes S + A \otimes T]
1077 
1078   where
1079     S is a dense (p \times q) matrix
1080     T is a dense (p \times q) matrix
1081     A is an AIJ  (n \times n) matrix
1082     I is the identity matrix
1083   The resulting matrix is (np \times nq)
1084 
1085   The matrix type is based on MATSEQAIJ for a sequential matrix A, and MATMPIAIJ for a distributed matrix A.
1086   S is always stored independently on all processes as a PetscScalar array in column-major format.
1087 
1088   Collective
1089 
1090   Input Parameters:
1091 + A - the AIJ matrix
1092 . S - the S matrix, stored as a PetscScalar array (column-major)
1093 . T - the T matrix, stored as a PetscScalar array (column-major)
1094 . p - number of rows in S and T
1095 - q - number of columns in S and T
1096 
1097   Output Parameter:
1098 . kaij - the new KAIJ matrix
1099 
1100   Operations provided:
1101 + MatMult
1102 . MatMultAdd
1103 . MatInvertBlockDiagonal
1104 - MatView
1105 
1106   Level: advanced
1107 
1108 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MATKAIJ
1109 @*/
1110 PetscErrorCode  MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij)
1111 {
1112   PetscErrorCode ierr;
1113   PetscMPIInt    size;
1114 
1115   PetscFunctionBegin;
1116   ierr = MatCreate(PetscObjectComm((PetscObject)A),kaij);CHKERRQ(ierr);
1117   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
1118   if (size == 1) {
1119     ierr = MatSetType(*kaij,MATSEQKAIJ);CHKERRQ(ierr);
1120   } else {
1121     ierr = MatSetType(*kaij,MATMPIKAIJ);CHKERRQ(ierr);
1122   }
1123   ierr = MatKAIJSetAIJ(*kaij,A);CHKERRQ(ierr);
1124   ierr = MatKAIJSetS(*kaij,p,q,S);CHKERRQ(ierr);
1125   ierr = MatKAIJSetT(*kaij,p,q,T);CHKERRQ(ierr);
1126   ierr = MatSetUp(*kaij);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 /*MC
1131   MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of the following form:
1132 
1133     [I \otimes S + A \otimes T]
1134 
1135   where
1136     S is a dense (p \times q) matrix
1137     T is a dense (p \times q) matrix
1138     A is an AIJ  (n \times n) matrix
1139     I is the identity matrix
1140   The resulting matrix is (np \times nq)
1141 
1142   The matrix type is based on MATSEQAIJ for a sequential matrix A, and MATMPIAIJ for a distributed matrix A.
1143   S and T are always stored independently on all processes as a PetscScalar array in column-major format.
1144 
1145   Operations provided:
1146 + MatMult
1147 . MatMultAdd
1148 . MatInvertBlockDiagonal
1149 - MatView
1150 
1151   Level: advanced
1152 
1153 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ()
1154 M*/
1155 
1156 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A)
1157 {
1158   PetscErrorCode ierr;
1159   Mat_MPIKAIJ    *b;
1160   PetscMPIInt    size;
1161 
1162   PetscFunctionBegin;
1163   ierr     = PetscNewLog(A,&b);CHKERRQ(ierr);
1164   A->data  = (void*)b;
1165 
1166   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1167 
1168   A->ops->setup = MatSetUp_KAIJ;
1169 
1170   b->w    = 0;
1171   ierr    = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
1172   if (size == 1) {
1173     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ);CHKERRQ(ierr);
1174 
1175     A->ops->setup               = MatSetUp_KAIJ;
1176     A->ops->destroy             = MatDestroy_SeqKAIJ;
1177     A->ops->view                = MatView_SeqKAIJ;
1178     A->ops->mult                = MatMult_SeqKAIJ_N;
1179     A->ops->multadd             = MatMultAdd_SeqKAIJ_N;
1180     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ_N;
1181     A->ops->getrow              = MatGetRow_SeqKAIJ;
1182     A->ops->restorerow          = MatRestoreRow_SeqKAIJ;
1183     A->ops->sor                 = MatSOR_SeqKAIJ;
1184   } else {
1185     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ);CHKERRQ(ierr);
1186     A->ops->setup               = MatSetUp_KAIJ;
1187     A->ops->destroy             = MatDestroy_MPIKAIJ;
1188     A->ops->view                = MatView_MPIKAIJ;
1189     A->ops->mult                = MatMult_MPIKAIJ_dof;
1190     A->ops->multadd             = MatMultAdd_MPIKAIJ_dof;
1191     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ_dof;
1192     A->ops->getrow              = MatGetRow_MPIKAIJ;
1193     A->ops->restorerow          = MatRestoreRow_MPIKAIJ;
1194     ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ);CHKERRQ(ierr);
1195   }
1196   A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ;
1197   PetscFunctionReturn(0);
1198 }
1199