xref: /petsc/src/mat/impls/kaij/kaij.c (revision a84f80695ca69670cd9a132d82ad57a724a655b8)
1 
2 /*
3   Defines the basic matrix operations for the KAIJ  matrix storage format.
4   This format is used to evaluate matrices of the form:
5 
6     [I \otimes S + A \otimes T]
7 
8   where
9     S is a dense (p \times q) matrix
10     T is a dense (p \times q) matrix
11     A is an AIJ  (n \times n) matrix
12     I is the identity matrix
13 
14   The resulting matrix is (np \times nq)
15 
16   We provide:
17      MatMult()
18      MatMultAdd()
19      MatInvertBlockDiagonal()
20   and
21      MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*)
22 
23   This single directory handles both the sequential and parallel codes
24 */
25 
26 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/
27 #include <../src/mat/utils/freespace.h>
28 #include <petsc/private/vecimpl.h>
29 
30 /*@C
31    MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix
32 
33    Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel
34 
35    Input Parameter:
36 .  A - the KAIJ matrix
37 
38    Output Parameter:
39 .  B - the AIJ matrix
40 
41    Level: advanced
42 
43    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
44 
45 .seealso: MatCreateKAIJ()
46 @*/
47 PetscErrorCode  MatKAIJGetAIJ(Mat A,Mat *B)
48 {
49   PetscErrorCode ierr;
50   PetscBool      ismpikaij,isseqkaij;
51 
52   PetscFunctionBegin;
53   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr);
54   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij);CHKERRQ(ierr);
55   if (ismpikaij) {
56     Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
57 
58     *B = b->A;
59   } else if (isseqkaij) {
60     Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
61 
62     *B = b->AIJ;
63   } else {
64     *B = A;
65   }
66   PetscFunctionReturn(0);
67 }
68 
69 /*@C
70    MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix
71 
72    Not Collective; the entire S is stored and returned independently on all processes.
73 
74    Input Parameter:
75 .  A - the KAIJ matrix
76 
77    Output Parameter:
78 .  S - the S matrix, in form of a scalar array in column-major format
79 
80    Notes:
81    The dimensions of the output array can be determined by calling MatGetBlockSizes() on the KAIJ matrix.
82    The row block and column block sizes returned will correspond to the number of rows and columns, respectively, in S.
83 
84    Level: advanced
85 
86 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
87 @*/
88 PetscErrorCode  MatKAIJGetS(Mat A,const PetscScalar **S)
89 {
90   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
91   PetscFunctionBegin;
92   *S = b->S;
93   PetscFunctionReturn(0);
94 }
95 
96 /*@C
97    MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix
98 
99    Not Collective; the entire T is stored and returned independently on all processes
100 
101    Input Parameter:
102 .  A - the KAIJ matrix
103 
104    Output Parameter:
105 .  T - the T matrix, in form of a scalar array in column-major format
106 
107    Notes:
108    The dimensions of the output array can be determined by calling MatGetBlockSizes() on the KAIJ matrix.
109    The row block and column block sizes returned will correspond to the number of rows and columns, respectively, in T.
110 
111    Level: advanced
112 
113 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
114 @*/
115 PetscErrorCode  MatKAIJGetT(Mat A,const PetscScalar **T)
116 {
117   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
118   PetscFunctionBegin;
119   *T = b->T;
120   PetscFunctionReturn(0);
121 }
122 
123 /*@
124    MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix
125 
126    Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel
127 
128    Input Parameters:
129 +  A - the KAIJ matrix
130 -  B - the AIJ matrix
131 
132    Level: advanced
133 
134 .seealso: MatKAIJGetAIJ(), MatKAIJSetS(), MatKAIJSetT()
135 @*/
136 PetscErrorCode MatKAIJSetAIJ(Mat A,Mat B)
137 {
138   PetscErrorCode ierr;
139   PetscMPIInt    size;
140 
141   PetscFunctionBegin;
142   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
143   if (size == 1) {
144     Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
145     a->AIJ = B;
146   } else {
147     Mat_MPIKAIJ *a = (Mat_MPIKAIJ*)A->data;
148     a->A = B;
149   }
150   PetscFunctionReturn(0);
151 }
152 
153 /*@C
154    MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix
155 
156    Logically Collective; the entire S is stored independently on all processes.
157 
158    Input Parameters:
159 +  A - the KAIJ matrix
160 .  p - the number of rows in S
161 .  q - the number of columns in S
162 -  S - the S matrix, in form of a scalar array in column-major format
163 
164    Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix.
165 
166    Level: Advanced
167 
168 .seealso: MatKAIJGetS(), MatKAIJSetT(), MatKAIJSetAIJ()
169 @*/
170 PetscErrorCode MatKAIJSetS(Mat A,PetscInt p,PetscInt q,const PetscScalar S[])
171 {
172   PetscErrorCode ierr;
173   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
174 
175   PetscFunctionBegin;
176   if (a->S) {
177     ierr = PetscFree(a->S);CHKERRQ(ierr);
178   }
179   if (S) {
180     ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->S);CHKERRQ(ierr);
181     ierr = PetscMemcpy(a->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
182   } else  a->S = NULL;
183 
184   a->p = p;
185   a->q = q;
186 
187   PetscFunctionReturn(0);
188 }
189 
190 /*@C
191    MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix
192 
193    Logically Collective; the entire T is stored independently on all processes.
194 
195    Input Parameters:
196 +  A - the KAIJ matrix
197 .  p - the number of rows in S
198 .  q - the number of columns in S
199 -  T - the T matrix, in form of a scalar array in column-major format
200 
201    Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix.
202 
203    Level: Advanced
204 
205 .seealso: MatKAIJGetT(), MatKAIJSetS(), MatKAIJSetAIJ()
206 @*/
207 PetscErrorCode MatKAIJSetT(Mat A,PetscInt p,PetscInt q,const PetscScalar T[])
208 {
209   PetscErrorCode ierr;
210   PetscInt       i,j;
211   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
212   PetscBool      isTI = PETSC_FALSE;
213 
214   PetscFunctionBegin;
215   /* check if T is an identity matrix */
216   if (T && (p == q)) {
217     isTI = PETSC_TRUE;
218     for (i=0; i<p; i++) {
219       for (j=0; j<q; j++) {
220         if (i == j) {
221           /* diagonal term must be 1 */
222           if (T[i+j*p] != 1.0) isTI = PETSC_FALSE;
223         } else {
224           /* off-diagonal term must be 0 */
225           if (T[i+j*p] != 0.0) isTI = PETSC_FALSE;
226         }
227       }
228     }
229   }
230   a->isTI = isTI;
231 
232   if (a->T) {
233     ierr = PetscFree(a->T);CHKERRQ(ierr);
234   }
235   if (T && (!isTI)) {
236     ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->T);CHKERRQ(ierr);
237     ierr = PetscMemcpy(a->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
238   } else a->T = NULL;
239 
240   a->p = p;
241   a->q = q;
242   PetscFunctionReturn(0);
243 }
244 
245 PetscErrorCode MatDestroy_SeqKAIJ(Mat A)
246 {
247   PetscErrorCode ierr;
248   Mat_SeqKAIJ    *b = (Mat_SeqKAIJ*)A->data;
249 
250   PetscFunctionBegin;
251   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
252   ierr = PetscFree(b->S);CHKERRQ(ierr);
253   ierr = PetscFree(b->T);CHKERRQ(ierr);
254   ierr = PetscFree(b->ibdiag);CHKERRQ(ierr);
255   ierr = PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr);CHKERRQ(ierr);
256   ierr = PetscFree(A->data);CHKERRQ(ierr);
257   PetscFunctionReturn(0);
258 }
259 
260 PetscErrorCode MatSetUp_KAIJ(Mat A)
261 {
262   PetscErrorCode ierr;
263   PetscInt       n;
264   PetscMPIInt    size;
265   Mat_SeqKAIJ    *seqkaij = (Mat_SeqKAIJ*)A->data;
266 
267   PetscFunctionBegin;
268   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
269   if (size == 1) {
270     ierr = MatSetSizes(A,seqkaij->p*seqkaij->AIJ->rmap->n,seqkaij->q*seqkaij->AIJ->cmap->n,seqkaij->p*seqkaij->AIJ->rmap->N,seqkaij->q*seqkaij->AIJ->cmap->N);CHKERRQ(ierr);
271     ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr);
272     ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr);
273     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
274     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
275     ierr = PetscObjectReference((PetscObject)seqkaij->AIJ);CHKERRQ(ierr);
276   } else {
277     Mat_MPIKAIJ *a;
278     Mat_MPIAIJ  *mpiaij;
279     IS          from,to;
280     Vec         gvec;
281     PetscScalar *T;
282     PetscInt    i,j;
283 
284     a = (Mat_MPIKAIJ*)A->data;
285     mpiaij = (Mat_MPIAIJ*)a->A->data;
286     ierr = MatSetSizes(A,a->p*a->A->rmap->n,a->q*a->A->cmap->n,a->p*a->A->rmap->N,a->q*a->A->cmap->N);CHKERRQ(ierr);
287     ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr);
288     ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr);
289     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
290     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
291     ierr = PetscObjectReference((PetscObject)a->A);CHKERRQ(ierr);
292 
293     if (a->isTI) {
294       /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL.
295        * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will
296        * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix
297        * to pass in. */
298       ierr = PetscMalloc1(a->p*a->q*sizeof(PetscScalar),&T);CHKERRQ(ierr);
299       for (i=0; i<a->p; i++) {
300         for (j=0; j<a->q; j++) {
301           if (i==j) T[i+j*a->p] = 1.0;
302           else      T[i+j*a->p] = 0.0;
303         }
304       }
305     } else T = a->T;
306     ierr = MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ);CHKERRQ(ierr);
307     ierr = MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ);CHKERRQ(ierr);
308     if (a->isTI) {
309       ierr = PetscFree(T);CHKERRQ(ierr);
310     }
311 
312     ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
313     ierr = VecCreate(PETSC_COMM_SELF,&a->w);CHKERRQ(ierr);
314     ierr = VecSetSizes(a->w,n*a->q,n*a->q);CHKERRQ(ierr);
315     ierr = VecSetBlockSize(a->w,a->q);CHKERRQ(ierr);
316     ierr = VecSetType(a->w,VECSEQ);CHKERRQ(ierr);
317 
318     /* create two temporary Index sets for build scatter gather */
319     ierr = ISCreateBlock(PetscObjectComm((PetscObject)a->A),a->q,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
320     ierr = ISCreateStride(PETSC_COMM_SELF,n*a->q,0,1,&to);CHKERRQ(ierr);
321 
322     /* create temporary global vector to generate scatter context */
323     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A),a->q,a->q*a->A->cmap->n,a->q*a->A->cmap->N,NULL,&gvec);CHKERRQ(ierr);
324 
325     /* generate the scatter context */
326     ierr = VecScatterCreate(gvec,from,a->w,to,&a->ctx);CHKERRQ(ierr);
327 
328     ierr = ISDestroy(&from);CHKERRQ(ierr);
329     ierr = ISDestroy(&to);CHKERRQ(ierr);
330     ierr = VecDestroy(&gvec);CHKERRQ(ierr);
331   }
332 
333   A->assembled = PETSC_TRUE;
334   PetscFunctionReturn(0);
335 }
336 
337 PetscErrorCode MatView_SeqKAIJ(Mat A,PetscViewer viewer)
338 {
339   PetscErrorCode ierr;
340   Mat            B;
341 
342   PetscFunctionBegin;
343   ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
344   ierr = MatView(B,viewer);CHKERRQ(ierr);
345   ierr = MatDestroy(&B);CHKERRQ(ierr);
346   PetscFunctionReturn(0);
347 }
348 
349 PetscErrorCode MatView_MPIKAIJ(Mat A,PetscViewer viewer)
350 {
351   PetscErrorCode ierr;
352   Mat            B;
353 
354   PetscFunctionBegin;
355   ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
356   ierr = MatView(B,viewer);CHKERRQ(ierr);
357   ierr = MatDestroy(&B);CHKERRQ(ierr);
358   PetscFunctionReturn(0);
359 }
360 
361 PetscErrorCode MatDestroy_MPIKAIJ(Mat A)
362 {
363   PetscErrorCode ierr;
364   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
365 
366   PetscFunctionBegin;
367   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
368   ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr);
369   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
370   ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr);
371   ierr = VecDestroy(&b->w);CHKERRQ(ierr);
372   ierr = PetscFree(b->S);CHKERRQ(ierr);
373   ierr = PetscFree(b->T);CHKERRQ(ierr);
374   ierr = PetscFree(b->ibdiag);CHKERRQ(ierr);
375   ierr = PetscFree(A->data);CHKERRQ(ierr);
376   PetscFunctionReturn(0);
377 }
378 
379 
380 /* --------------------------------------------------------------------------------------*/
381 
382 /* zz = yy + Axx */
383 PetscErrorCode KAIJMultAdd_Seq(Mat A,Vec xx,Vec yy,Vec zz)
384 {
385   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ*)A->data;
386   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
387   const PetscScalar *s = b->S, *t = b->T;
388   const PetscScalar *x,*v,*bx;
389   PetscScalar       *y,*sums;
390   PetscErrorCode    ierr;
391   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
392   PetscInt          n,i,jrow,j,l,p=b->p,q=b->q,k;
393 
394   PetscFunctionBegin;
395   if (!yy) {
396     ierr = VecSet(zz,0.0);CHKERRQ(ierr);
397   } else {
398     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
399   }
400   if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0);
401 
402   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
403   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
404   idx  = a->j;
405   v    = a->a;
406   ii   = a->i;
407 
408   if (b->isTI) {
409     for (i=0; i<m; i++) {
410       jrow = ii[i];
411       n    = ii[i+1] - jrow;
412       sums = y + p*i;
413       for (j=0; j<n; j++) {
414         for (k=0; k<p; k++) {
415           sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k];
416         }
417       }
418     }
419   } else if (t) {
420     for (i=0; i<m; i++) {
421       jrow = ii[i];
422       n    = ii[i+1] - jrow;
423       sums = y + p*i;
424       bx   = x + q*i;
425       for (j=0; j<n; j++) {
426         for (k=0; k<p; k++) {
427           for (l=0; l<q; l++) {
428             sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l];
429           }
430         }
431       }
432     }
433   }
434   if (s) {
435     for (i=0; i<m; i++) {
436       jrow = ii[i];
437       n    = ii[i+1] - jrow;
438       sums = y + p*i;
439       bx   = x + q*i;
440       if (i < b->AIJ->cmap->n) {
441         for (j=0; j<q; j++) {
442           for (k=0; k<p; k++) {
443             sums[k] += s[k+j*p]*bx[j];
444           }
445         }
446       }
447     }
448   }
449 
450   ierr = PetscLogFlops((2.0*p*q-p)*m+2*p*a->nz);CHKERRQ(ierr);
451   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
452   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
453   PetscFunctionReturn(0);
454 }
455 
456 PetscErrorCode MatMult_SeqKAIJ_N(Mat A,Vec xx,Vec yy)
457 {
458   PetscErrorCode ierr;
459   PetscFunctionBegin;
460   ierr = KAIJMultAdd_Seq(A,xx,PETSC_NULL,yy);CHKERRQ(ierr);
461   PetscFunctionReturn(0);
462 }
463 
464 PetscErrorCode MatMultAdd_SeqKAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
465 {
466   PetscErrorCode ierr;
467   PetscFunctionBegin;
468   ierr = KAIJMultAdd_Seq(A,xx,yy,zz);CHKERRQ(ierr);
469   PetscFunctionReturn(0);
470 }
471 
472 #include <petsc/private/kernels/blockinvert.h>
473 
474 PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ_N(Mat A,const PetscScalar **values)
475 {
476   Mat_SeqKAIJ       *b  = (Mat_SeqKAIJ*)A->data;
477   Mat_SeqAIJ        *a  = (Mat_SeqAIJ*)b->AIJ->data;
478   const PetscScalar *S  = b->S;
479   const PetscScalar *T  = b->T;
480   const PetscScalar *v  = a->a;
481   const PetscInt     p  = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i;
482   PetscErrorCode    ierr;
483   PetscInt          i,j,*v_pivots,dof,dof2;
484   PetscScalar       *diag,aval,*v_work;
485 
486   PetscFunctionBegin;
487   if (p != q) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse.");
488   if ((!S) && (!T) && (!b->isTI)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix.");
489 
490   dof  = p;
491   dof2 = dof*dof;
492 
493   if (b->ibdiagvalid) {
494     if (values) *values = b->ibdiag;
495     PetscFunctionReturn(0);
496   }
497   if (!b->ibdiag) {
498     ierr = PetscMalloc1(dof2*m*sizeof(PetscScalar),&b->ibdiag);CHKERRQ(ierr);
499     ierr = PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar));CHKERRQ(ierr);
500   }
501   if (values) *values = b->ibdiag;
502   diag = b->ibdiag;
503 
504   ierr = PetscMalloc2(dof,&v_work,dof,&v_pivots);CHKERRQ(ierr);
505   for (i=0; i<m; i++) {
506     if (S) {
507       ierr = PetscMemcpy(diag,S,dof2*sizeof(PetscScalar));CHKERRQ(ierr);
508     } else {
509       ierr = PetscMemzero(diag,dof2*sizeof(PetscScalar));CHKERRQ(ierr);
510     }
511     if (b->isTI) {
512       aval = 0;
513       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
514       for (j=0; j<dof; j++) diag[j+dof*j] += aval;
515     } else if (T) {
516       aval = 0;
517       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
518       for (j=0; j<dof2; j++) diag[j] += aval*T[j];
519     }
520     ierr = PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL);CHKERRQ(ierr);
521     diag += dof2;
522   }
523   ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
524 
525   b->ibdiagvalid = PETSC_TRUE;
526   PetscFunctionReturn(0);
527 }
528 
529 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B)
530 {
531   Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data;
532 
533   PetscFunctionBegin;
534   *B = kaij->AIJ;
535   PetscFunctionReturn(0);
536 }
537 
538 PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
539 {
540   PetscErrorCode    ierr;
541   Mat_SeqKAIJ       *kaij = (Mat_SeqKAIJ*) A->data;
542   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)kaij->AIJ->data;
543   const PetscScalar *aa = a->a, *T = kaij->T, *v;
544   const PetscInt    m  = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi;
545   const PetscScalar *b, *xb, *idiag;
546   PetscScalar       *x, *work, *workt, *w, *y, *arr, *t, *arrt;
547   PetscInt          i, j, k, i2, bs, bs2, nz;
548 
549   PetscFunctionBegin;
550   its = its*lits;
551   if (flag & SOR_EISENSTAT) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
552   if (its <= 0)             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
553   if (fshift)               SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
554   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
555   if (p != q) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: Sorry, no support for non-square dense blocks");
556   else        {bs = p; bs2 = bs*bs; }
557 
558   if (!m) PetscFunctionReturn(0);
559 
560   if (!kaij->ibdiagvalid) { ierr = MatInvertBlockDiagonal_SeqKAIJ_N(A,NULL);CHKERRQ(ierr); }
561   idiag = kaij->ibdiag;
562   diag  = a->diag;
563 
564   if (!kaij->sor.setup) {
565     ierr = PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr);CHKERRQ(ierr);
566     kaij->sor.setup = PETSC_TRUE;
567   }
568   y     = kaij->sor.y;
569   w     = kaij->sor.w;
570   work  = kaij->sor.work;
571   t     = kaij->sor.t;
572   arr   = kaij->sor.arr;
573 
574   ierr = VecGetArray(xx,&x);    CHKERRQ(ierr);
575   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
576 
577   if (flag & SOR_ZERO_INITIAL_GUESS) {
578     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
579       PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);                            /* x[0:bs] <- D^{-1} b[0:bs] */
580       ierr   =  PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
581       i2     =  bs;
582       idiag  += bs2;
583       for (i=1; i<m; i++) {
584         v  = aa + ai[i];
585         vi = aj + ai[i];
586         nz = diag[i] - ai[i];
587 
588         if (T) {                /* b - T (Arow * x) */
589           for (k=0; k<bs; k++) w[k] = 0;
590           for (j=0; j<nz; j++) {
591             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
592           }
593           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]);
594           for (k=0; k<bs; k++) t[i2+k] += b[i2+k];
595         } else if (kaij->isTI) {
596           for (k=0; k<bs; k++) t[i2+k] = b[i2+k];
597           for (j=0; j<nz; j++) {
598             for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k];
599           }
600         } else {
601           for (k=0; k<bs; k++) t[i2+k] = b[i2+k];
602         }
603 
604         PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y);
605         for (j=0; j<bs; j++) x[i2+j] = omega * y[j];
606 
607         idiag += bs2;
608         i2    += bs;
609       }
610       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
611       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
612       xb = t;
613     } else xb = b;
614     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
615       idiag = kaij->ibdiag+bs2*(m-1);
616       i2    = bs * (m-1);
617       ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
618       PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
619       i2    -= bs;
620       idiag -= bs2;
621       for (i=m-2; i>=0; i--) {
622         v  = aa + diag[i] + 1 ;
623         vi = aj + diag[i] + 1;
624         nz = ai[i+1] - diag[i] - 1;
625 
626         if (T) {                /* FIXME: This branch untested */
627           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
628           /* copy all rows of x that are needed into contiguous space */
629           workt = work;
630           for (j=0; j<nz; j++) {
631             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
632             workt += bs;
633           }
634           arrt = arr;
635           for (j=0; j<nz; j++) {
636             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
637             for (k=0; k<bs2; k++) arrt[k] *= v[j];
638             arrt += bs2;
639           }
640           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
641         } else if (kaij->isTI) {
642           for (k=0; k<bs; k++) w[k] = t[i2+k];
643           for (j=0; j<nz; j++) {
644             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
645           }
646         }
647 
648         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */
649         for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j];
650 
651         idiag -= bs2;
652         i2    -= bs;
653       }
654       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
655     }
656     its--;
657   }
658   while (its--) {               /* FIXME: This branch not updated */
659     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
660       i2     =  0;
661       idiag  = kaij->ibdiag;
662       for (i=0; i<m; i++) {
663         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
664 
665         v  = aa + ai[i];
666         vi = aj + ai[i];
667         nz = diag[i] - ai[i];
668         workt = work;
669         for (j=0; j<nz; j++) {
670           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
671           workt += bs;
672         }
673         arrt = arr;
674         if (T) {
675           for (j=0; j<nz; j++) {
676             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
677             for (k=0; k<bs2; k++) arrt[k] *= v[j];
678             arrt += bs2;
679           }
680           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
681         } else if (kaij->isTI) {
682           for (j=0; j<nz; j++) {
683             ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
684             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
685             arrt += bs2;
686           }
687           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
688         }
689         ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
690 
691         v  = aa + diag[i] + 1;
692         vi = aj + diag[i] + 1;
693         nz = ai[i+1] - diag[i] - 1;
694         workt = work;
695         for (j=0; j<nz; j++) {
696           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
697           workt += bs;
698         }
699         arrt = arr;
700         if (T) {
701           for (j=0; j<nz; j++) {
702             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
703             for (k=0; k<bs2; k++) arrt[k] *= v[j];
704             arrt += bs2;
705           }
706           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
707         } else if (kaij->isTI) {
708           for (j=0; j<nz; j++) {
709             ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
710             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
711             arrt += bs2;
712           }
713           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
714         }
715 
716         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
717         for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
718 
719         idiag += bs2;
720         i2    += bs;
721       }
722       xb = t;
723     }
724     else xb = b;
725     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
726       idiag = kaij->ibdiag+bs2*(m-1);
727       i2    = bs * (m-1);
728       if (xb == b) {
729         for (i=m-1; i>=0; i--) {
730           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
731 
732           v  = aa + ai[i];
733           vi = aj + ai[i];
734           nz = diag[i] - ai[i];
735           workt = work;
736           for (j=0; j<nz; j++) {
737             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
738             workt += bs;
739           }
740           arrt = arr;
741           if (T) {
742             for (j=0; j<nz; j++) {
743               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
744               for (k=0; k<bs2; k++) arrt[k] *= v[j];
745               arrt += bs2;
746             }
747             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
748           } else if (kaij->isTI) {
749             for (j=0; j<nz; j++) {
750               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
751               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
752               arrt += bs2;
753             }
754             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
755           }
756 
757           v  = aa + diag[i] + 1;
758           vi = aj + diag[i] + 1;
759           nz = ai[i+1] - diag[i] - 1;
760           workt = work;
761           for (j=0; j<nz; j++) {
762             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
763             workt += bs;
764           }
765           arrt = arr;
766           if (T) {
767             for (j=0; j<nz; j++) {
768               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
769               for (k=0; k<bs2; k++) arrt[k] *= v[j];
770               arrt += bs2;
771             }
772             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
773           } else if (kaij->isTI) {
774             for (j=0; j<nz; j++) {
775               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
776               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
777               arrt += bs2;
778             }
779             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
780           }
781 
782           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
783           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
784         }
785       } else {
786         for (i=m-1; i>=0; i--) {
787           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
788           v  = aa + diag[i] + 1;
789           vi = aj + diag[i] + 1;
790           nz = ai[i+1] - diag[i] - 1;
791           workt = work;
792           for (j=0; j<nz; j++) {
793             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
794             workt += bs;
795           }
796           arrt = arr;
797           if (T) {
798             for (j=0; j<nz; j++) {
799               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
800               for (k=0; k<bs2; k++) arrt[k] *= v[j];
801               arrt += bs2;
802             }
803             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
804           } else if (kaij->isTI) {
805             for (j=0; j<nz; j++) {
806               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
807               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
808               arrt += bs2;
809             }
810             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
811           }
812           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
813           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
814         }
815         idiag -= bs2;
816         i2    -= bs;
817       }
818       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
819     }
820   }
821 
822   ierr = VecRestoreArray(xx,&x);    CHKERRQ(ierr);
823   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
824   PetscFunctionReturn(0);
825 }
826 
827 /*===================================================================================*/
828 
829 PetscErrorCode KAIJMultAdd_MPI(Mat A,Vec xx,Vec yy,Vec zz)
830 {
831   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
832   PetscErrorCode ierr;
833 
834   PetscFunctionBegin;
835   if (!yy) {
836     ierr = VecSet(zz,0.0);CHKERRQ(ierr);
837   } else {
838     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
839   }
840   /* start the scatter */
841   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
842   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz);CHKERRQ(ierr);
843   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
844   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
845   PetscFunctionReturn(0);
846 }
847 
848 PetscErrorCode MatMult_MPIKAIJ_dof(Mat A,Vec xx,Vec yy)
849 {
850   PetscErrorCode ierr;
851   PetscFunctionBegin;
852   ierr = KAIJMultAdd_MPI(A,xx,PETSC_NULL,yy);CHKERRQ(ierr);
853   PetscFunctionReturn(0);
854 }
855 
856 PetscErrorCode MatMultAdd_MPIKAIJ_dof(Mat A,Vec xx,Vec yy, Vec zz)
857 {
858   PetscErrorCode ierr;
859   PetscFunctionBegin;
860   ierr = KAIJMultAdd_MPI(A,xx,yy,zz);CHKERRQ(ierr);
861   PetscFunctionReturn(0);
862 }
863 
864 PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ_dof(Mat A,const PetscScalar **values)
865 {
866   Mat_MPIKAIJ     *b = (Mat_MPIKAIJ*)A->data;
867   PetscErrorCode  ierr;
868 
869   PetscFunctionBegin;
870   ierr = (*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values);CHKERRQ(ierr);
871   PetscFunctionReturn(0);
872 }
873 
874 /* ----------------------------------------------------------------*/
875 
876 PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
877 {
878   Mat_SeqKAIJ     *b   = (Mat_SeqKAIJ*) A->data;
879   PetscErrorCode  diag = PETSC_FALSE;
880   PetscErrorCode  ierr;
881   PetscInt        nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c;
882   PetscScalar     *vaij,*v,*S=b->S,*T=b->T;
883 
884   PetscFunctionBegin;
885   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
886   b->getrowactive = PETSC_TRUE;
887   if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row);
888 
889   if ((!S) && (!T) && (!b->isTI)) {
890     if (ncols)    *ncols  = 0;
891     if (cols)     *cols   = NULL;
892     if (values)   *values = NULL;
893     PetscFunctionReturn(0);
894   }
895 
896   if (T || b->isTI) {
897     ierr  = MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij);CHKERRQ(ierr);
898     c     = nzaij;
899     for (i=0; i<nzaij; i++) {
900       /* check if this row contains a diagonal entry */
901       if (colsaij[i] == r) {
902         diag = PETSC_TRUE;
903         c = i;
904       }
905     }
906   } else nzaij = c = 0;
907 
908   /* calculate size of row */
909   nz = 0;
910   if (S)            nz += q;
911   if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q);
912 
913   if (cols || values) {
914     ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr);
915     for (i=0; i<q; i++) {
916       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
917       v[i] = 0.0;
918     }
919     if (b->isTI) {
920       for (i=0; i<nzaij; i++) {
921         for (j=0; j<q; j++) {
922           idx[i*q+j] = colsaij[i]*q+j;
923           v[i*q+j]   = (j==s ? vaij[i] : 0);
924         }
925       }
926     } else if (T) {
927       for (i=0; i<nzaij; i++) {
928         for (j=0; j<q; j++) {
929           idx[i*q+j] = colsaij[i]*q+j;
930           v[i*q+j]   = vaij[i]*T[s+j*p];
931         }
932       }
933     }
934     if (S) {
935       for (j=0; j<q; j++) {
936         idx[c*q+j] = r*q+j;
937         v[c*q+j]  += S[s+j*p];
938       }
939     }
940   }
941 
942   if (ncols)    *ncols  = nz;
943   if (cols)     *cols   = idx;
944   if (values)   *values = v;
945 
946   PetscFunctionReturn(0);
947 }
948 
949 PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
950 {
951   PetscErrorCode ierr;
952   PetscFunctionBegin;
953   ierr = PetscFree2(*idx,*v);CHKERRQ(ierr);
954   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
955   PetscFunctionReturn(0);
956 }
957 
958 PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
959 {
960   Mat_MPIKAIJ     *b      = (Mat_MPIKAIJ*) A->data;
961   Mat             MatAIJ  = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ;
962   Mat             MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ;
963   Mat             AIJ     = b->A;
964   PetscBool       diag    = PETSC_FALSE;
965   PetscErrorCode  ierr;
966   const PetscInt  rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray;
967   PetscInt        nz,*idx,ncolsaij,ncolsoaij,*colsaij,*colsoaij,r,s,c,i,j,lrow;
968   PetscScalar     *v,*vals,*ovals,*S=b->S,*T=b->T;
969 
970   PetscFunctionBegin;
971   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
972   b->getrowactive = PETSC_TRUE;
973   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
974   lrow = row - rstart;
975 
976   if ((!S) && (!T) && (!b->isTI)) {
977     if (ncols)    *ncols  = 0;
978     if (cols)     *cols   = NULL;
979     if (values)   *values = NULL;
980     PetscFunctionReturn(0);
981   }
982 
983   r = lrow/p;
984   s = lrow%p;
985 
986   if (T || b->isTI) {
987     ierr = MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray);
988     ierr = MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals);CHKERRQ(ierr);
989     ierr = MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals);CHKERRQ(ierr);
990 
991     c     = ncolsaij + ncolsoaij;
992     for (i=0; i<ncolsaij; i++) {
993       /* check if this row contains a diagonal entry */
994       if (colsaij[i] == r) {
995         diag = PETSC_TRUE;
996         c = i;
997       }
998     }
999   } else c = 0;
1000 
1001   /* calculate size of row */
1002   nz = 0;
1003   if (S)            nz += q;
1004   if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q);
1005 
1006   if (cols || values) {
1007     ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr);
1008     for (i=0; i<q; i++) {
1009       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1010       v[i] = 0.0;
1011     }
1012     if (b->isTI) {
1013       for (i=0; i<ncolsaij; i++) {
1014         for (j=0; j<q; j++) {
1015           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1016           v[i*q+j]   = (j==s ? vals[i] : 0.0);
1017         }
1018       }
1019       for (i=0; i<ncolsoaij; i++) {
1020         for (j=0; j<q; j++) {
1021           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1022           v[(i+ncolsaij)*q+j]   = (j==s ? ovals[i]: 0.0);
1023         }
1024       }
1025     } else if (T) {
1026       for (i=0; i<ncolsaij; i++) {
1027         for (j=0; j<q; j++) {
1028           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1029           v[i*q+j]   = vals[i]*T[s+j*p];
1030         }
1031       }
1032       for (i=0; i<ncolsoaij; i++) {
1033         for (j=0; j<q; j++) {
1034           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1035           v[(i+ncolsaij)*q+j]   = ovals[i]*T[s+j*p];
1036         }
1037       }
1038     }
1039     if (S) {
1040       for (j=0; j<q; j++) {
1041         idx[c*q+j] = (r+rstart/p)*q+j;
1042         v[c*q+j]  += S[s+j*p];
1043       }
1044     }
1045   }
1046 
1047   if (ncols)  *ncols  = nz;
1048   if (cols)   *cols   = idx;
1049   if (values) *values = v;
1050   PetscFunctionReturn(0);
1051 }
1052 
1053 PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1054 {
1055   PetscErrorCode ierr;
1056   PetscFunctionBegin;
1057   ierr = PetscFree2(*idx,*v);CHKERRQ(ierr);
1058   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
1059   PetscFunctionReturn(0);
1060 }
1061 
1062 PetscErrorCode  MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
1063 {
1064   PetscErrorCode ierr;
1065   Mat            A;
1066 
1067   PetscFunctionBegin;
1068   ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1069   ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr);
1070   ierr = MatDestroy(&A);CHKERRQ(ierr);
1071   PetscFunctionReturn(0);
1072 }
1073 
1074 /* ---------------------------------------------------------------------------------- */
1075 /*@C
1076   MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form:
1077 
1078     [I \otimes S + A \otimes T]
1079 
1080   where
1081     S is a dense (p \times q) matrix
1082     T is a dense (p \times q) matrix
1083     A is an AIJ  (n \times n) matrix
1084     I is the identity matrix
1085   The resulting matrix is (np \times nq)
1086 
1087   The matrix type is based on MATSEQAIJ for a sequential matrix A, and MATMPIAIJ for a distributed matrix A.
1088   S is always stored independently on all processes as a PetscScalar array in column-major format.
1089 
1090   Collective
1091 
1092   Input Parameters:
1093 + A - the AIJ matrix
1094 . S - the S matrix, stored as a PetscScalar array (column-major)
1095 . T - the T matrix, stored as a PetscScalar array (column-major)
1096 . p - number of rows in S and T
1097 - q - number of columns in S and T
1098 
1099   Output Parameter:
1100 . kaij - the new KAIJ matrix
1101 
1102   Operations provided:
1103 + MatMult
1104 . MatMultAdd
1105 . MatInvertBlockDiagonal
1106 - MatView
1107 
1108   Level: advanced
1109 
1110 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MATKAIJ
1111 @*/
1112 PetscErrorCode  MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij)
1113 {
1114   PetscErrorCode ierr;
1115   PetscMPIInt    size;
1116 
1117   PetscFunctionBegin;
1118   ierr = MatCreate(PetscObjectComm((PetscObject)A),kaij);CHKERRQ(ierr);
1119   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
1120   if (size == 1) {
1121     ierr = MatSetType(*kaij,MATSEQKAIJ);CHKERRQ(ierr);
1122   } else {
1123     ierr = MatSetType(*kaij,MATMPIKAIJ);CHKERRQ(ierr);
1124   }
1125   ierr = MatKAIJSetAIJ(*kaij,A);CHKERRQ(ierr);
1126   ierr = MatKAIJSetS(*kaij,p,q,S);CHKERRQ(ierr);
1127   ierr = MatKAIJSetT(*kaij,p,q,T);CHKERRQ(ierr);
1128   ierr = MatSetUp(*kaij);
1129   PetscFunctionReturn(0);
1130 }
1131 
1132 /*MC
1133   MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of the following form:
1134 
1135     [I \otimes S + A \otimes T]
1136 
1137   where
1138     S is a dense (p \times q) matrix
1139     T is a dense (p \times q) matrix
1140     A is an AIJ  (n \times n) matrix
1141     I is the identity matrix
1142   The resulting matrix is (np \times nq)
1143 
1144   The matrix type is based on MATSEQAIJ for a sequential matrix A, and MATMPIAIJ for a distributed matrix A.
1145   S and T are always stored independently on all processes as a PetscScalar array in column-major format.
1146 
1147   Operations provided:
1148 + MatMult
1149 . MatMultAdd
1150 . MatInvertBlockDiagonal
1151 - MatView
1152 
1153   Level: advanced
1154 
1155 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ()
1156 M*/
1157 
1158 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A)
1159 {
1160   PetscErrorCode ierr;
1161   Mat_MPIKAIJ    *b;
1162   PetscMPIInt    size;
1163 
1164   PetscFunctionBegin;
1165   ierr     = PetscNewLog(A,&b);CHKERRQ(ierr);
1166   A->data  = (void*)b;
1167 
1168   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1169 
1170   A->ops->setup = MatSetUp_KAIJ;
1171 
1172   b->w    = 0;
1173   ierr    = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
1174   if (size == 1) {
1175     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ);CHKERRQ(ierr);
1176 
1177     A->ops->setup               = MatSetUp_KAIJ;
1178     A->ops->destroy             = MatDestroy_SeqKAIJ;
1179     A->ops->view                = MatView_SeqKAIJ;
1180     A->ops->mult                = MatMult_SeqKAIJ_N;
1181     A->ops->multadd             = MatMultAdd_SeqKAIJ_N;
1182     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ_N;
1183     A->ops->getrow              = MatGetRow_SeqKAIJ;
1184     A->ops->restorerow          = MatRestoreRow_SeqKAIJ;
1185     A->ops->sor                 = MatSOR_SeqKAIJ;
1186   } else {
1187     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ);CHKERRQ(ierr);
1188     A->ops->setup               = MatSetUp_KAIJ;
1189     A->ops->destroy             = MatDestroy_MPIKAIJ;
1190     A->ops->view                = MatView_MPIKAIJ;
1191     A->ops->mult                = MatMult_MPIKAIJ_dof;
1192     A->ops->multadd             = MatMultAdd_MPIKAIJ_dof;
1193     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ_dof;
1194     A->ops->getrow              = MatGetRow_MPIKAIJ;
1195     A->ops->restorerow          = MatRestoreRow_MPIKAIJ;
1196     ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ);CHKERRQ(ierr);
1197   }
1198   A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ;
1199   PetscFunctionReturn(0);
1200 }
1201