xref: /petsc/src/mat/impls/kaij/kaij.c (revision 7c9260e7cd07f6008c741433b7c9a01033ac51dc)
1 
2 /*
3   Defines the basic matrix operations for the KAIJ  matrix storage format.
4   This format is used to evaluate matrices of the form:
5 
6     [I \otimes S + A \otimes T]
7 
8   where
9     S is a dense (p \times q) matrix
10     T is a dense (p \times q) matrix
11     A is an AIJ  (n \times n) matrix
12     I is the identity matrix
13 
14   The resulting matrix is (np \times nq)
15 
16   We provide:
17      MatMult()
18      MatMultAdd()
19      MatInvertBlockDiagonal()
20   and
21      MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*)
22 
23   This single directory handles both the sequential and parallel codes
24 */
25 
26 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/
27 #include <../src/mat/utils/freespace.h>
28 #include <petsc/private/vecimpl.h>
29 
30 /*@C
31    MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix
32 
33    Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel
34 
35    Input Parameter:
36 .  A - the KAIJ matrix
37 
38    Output Parameter:
39 .  B - the AIJ matrix
40 
41    Level: advanced
42 
43    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
44 
45 .seealso: MatCreateKAIJ()
46 @*/
47 PetscErrorCode  MatKAIJGetAIJ(Mat A,Mat *B)
48 {
49   PetscErrorCode ierr;
50   PetscBool      ismpikaij,isseqkaij;
51 
52   PetscFunctionBegin;
53   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr);
54   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij);CHKERRQ(ierr);
55   if (ismpikaij) {
56     Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
57 
58     *B = b->A;
59   } else if (isseqkaij) {
60     Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
61 
62     *B = b->AIJ;
63   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_ARG_WRONG,"Matrix passed in is not of type KAIJ");
64   PetscFunctionReturn(0);
65 }
66 
67 /*@C
68    MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix
69 
70    Not Collective; the entire S is stored and returned independently on all processes.
71 
72    Input Parameter:
73 .  A - the KAIJ matrix
74 
75    Output Parameter:
76 .  S - the S matrix, in form of a scalar array in column-major format
77 
78    Notes:
79    The dimensions of the output array can be determined by calling MatGetBlockSizes() on the KAIJ matrix.
80    The row block and column block sizes returned will correspond to the number of rows and columns, respectively, in S.
81 
82    Level: advanced
83 
84 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
85 @*/
86 PetscErrorCode  MatKAIJGetS(Mat A,const PetscScalar **S)
87 {
88   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
89   PetscFunctionBegin;
90   *S = b->S;
91   PetscFunctionReturn(0);
92 }
93 
94 /*@C
95    MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix
96 
97    Not Collective; the entire T is stored and returned independently on all processes
98 
99    Input Parameter:
100 .  A - the KAIJ matrix
101 
102    Output Parameter:
103 .  T - the T matrix, in form of a scalar array in column-major format
104 
105    Notes:
106    The dimensions of the output array can be determined by calling MatGetBlockSizes() on the KAIJ matrix.
107    The row block and column block sizes returned will correspond to the number of rows and columns, respectively, in T.
108 
109    Level: advanced
110 
111 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
112 @*/
113 PetscErrorCode  MatKAIJGetT(Mat A,const PetscScalar **T)
114 {
115   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
116   PetscFunctionBegin;
117   *T = b->T;
118   PetscFunctionReturn(0);
119 }
120 
121 /*@
122    MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix
123 
124    Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel
125 
126    Input Parameters:
127 +  A - the KAIJ matrix
128 -  B - the AIJ matrix
129 
130    Level: advanced
131 
132 .seealso: MatKAIJGetAIJ(), MatKAIJSetS(), MatKAIJSetT()
133 @*/
134 PetscErrorCode MatKAIJSetAIJ(Mat A,Mat B)
135 {
136   PetscErrorCode ierr;
137   PetscMPIInt    size;
138 
139   PetscFunctionBegin;
140   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
141   if (size == 1) {
142     Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
143     a->AIJ = B;
144   } else {
145     Mat_MPIKAIJ *a = (Mat_MPIKAIJ*)A->data;
146     a->A = B;
147   }
148   PetscFunctionReturn(0);
149 }
150 
151 /*@C
152    MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix
153 
154    Logically Collective; the entire S is stored independently on all processes.
155 
156    Input Parameters:
157 +  A - the KAIJ matrix
158 .  p - the number of rows in S
159 .  q - the number of columns in S
160 -  S - the S matrix, in form of a scalar array in column-major format
161 
162    Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix.
163 
164    Level: Advanced
165 
166 .seealso: MatKAIJGetS(), MatKAIJSetT(), MatKAIJSetAIJ()
167 @*/
168 PetscErrorCode MatKAIJSetS(Mat A,PetscInt p,PetscInt q,const PetscScalar S[])
169 {
170   PetscErrorCode ierr;
171   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
172 
173   PetscFunctionBegin;
174   ierr = PetscFree(a->S);CHKERRQ(ierr);
175   if (S) {
176     ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->S);CHKERRQ(ierr);
177     ierr = PetscMemcpy(a->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
178   } else  a->S = NULL;
179 
180   a->p = p;
181   a->q = q;
182   PetscFunctionReturn(0);
183 }
184 
185 /*@C
186    MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix
187 
188    Logically Collective; the entire T is stored independently on all processes.
189 
190    Input Parameters:
191 +  A - the KAIJ matrix
192 .  p - the number of rows in S
193 .  q - the number of columns in S
194 -  T - the T matrix, in form of a scalar array in column-major format
195 
196    Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix.
197 
198    Level: Advanced
199 
200 .seealso: MatKAIJGetT(), MatKAIJSetS(), MatKAIJSetAIJ()
201 @*/
202 PetscErrorCode MatKAIJSetT(Mat A,PetscInt p,PetscInt q,const PetscScalar T[])
203 {
204   PetscErrorCode ierr;
205   PetscInt       i,j;
206   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
207   PetscBool      isTI = PETSC_FALSE;
208 
209   PetscFunctionBegin;
210   /* check if T is an identity matrix */
211   if (T && (p == q)) {
212     isTI = PETSC_TRUE;
213     for (i=0; i<p; i++) {
214       for (j=0; j<q; j++) {
215         if (i == j) {
216           /* diagonal term must be 1 */
217           if (T[i+j*p] != 1.0) isTI = PETSC_FALSE;
218         } else {
219           /* off-diagonal term must be 0 */
220           if (T[i+j*p] != 0.0) isTI = PETSC_FALSE;
221         }
222       }
223     }
224   }
225   a->isTI = isTI;
226 
227   ierr = PetscFree(a->T);CHKERRQ(ierr);
228   if (T && (!isTI)) {
229     ierr = PetscMalloc1(p*q*sizeof(PetscScalar),&a->T);CHKERRQ(ierr);
230     ierr = PetscMemcpy(a->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
231   } else a->T = NULL;
232 
233   a->p = p;
234   a->q = q;
235   PetscFunctionReturn(0);
236 }
237 
238 PetscErrorCode MatDestroy_SeqKAIJ(Mat A)
239 {
240   PetscErrorCode ierr;
241   Mat_SeqKAIJ    *b = (Mat_SeqKAIJ*)A->data;
242 
243   PetscFunctionBegin;
244   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
245   ierr = PetscFree(b->S);CHKERRQ(ierr);
246   ierr = PetscFree(b->T);CHKERRQ(ierr);
247   ierr = PetscFree(b->ibdiag);CHKERRQ(ierr);
248   ierr = PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr);CHKERRQ(ierr);
249   ierr = PetscFree(A->data);CHKERRQ(ierr);
250   PetscFunctionReturn(0);
251 }
252 
253 PetscErrorCode MatSetUp_KAIJ(Mat A)
254 {
255   PetscErrorCode ierr;
256   PetscInt       n;
257   PetscMPIInt    size;
258   Mat_SeqKAIJ    *seqkaij = (Mat_SeqKAIJ*)A->data;
259 
260   PetscFunctionBegin;
261   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
262   if (size == 1) {
263     ierr = MatSetSizes(A,seqkaij->p*seqkaij->AIJ->rmap->n,seqkaij->q*seqkaij->AIJ->cmap->n,seqkaij->p*seqkaij->AIJ->rmap->N,seqkaij->q*seqkaij->AIJ->cmap->N);CHKERRQ(ierr);
264     ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr);
265     ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr);
266     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
267     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
268     ierr = PetscObjectReference((PetscObject)seqkaij->AIJ);CHKERRQ(ierr);
269   } else {
270     Mat_MPIKAIJ *a;
271     Mat_MPIAIJ  *mpiaij;
272     IS          from,to;
273     Vec         gvec;
274     PetscScalar *T;
275     PetscInt    i,j;
276 
277     a = (Mat_MPIKAIJ*)A->data;
278     mpiaij = (Mat_MPIAIJ*)a->A->data;
279     ierr = MatSetSizes(A,a->p*a->A->rmap->n,a->q*a->A->cmap->n,a->p*a->A->rmap->N,a->q*a->A->cmap->N);CHKERRQ(ierr);
280     ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr);
281     ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr);
282     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
283     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
284     ierr = PetscObjectReference((PetscObject)a->A);CHKERRQ(ierr);
285 
286     if (a->isTI) {
287       /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL.
288        * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will
289        * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix
290        * to pass in. */
291       ierr = PetscMalloc1(a->p*a->q*sizeof(PetscScalar),&T);CHKERRQ(ierr);
292       for (i=0; i<a->p; i++) {
293         for (j=0; j<a->q; j++) {
294           if (i==j) T[i+j*a->p] = 1.0;
295           else      T[i+j*a->p] = 0.0;
296         }
297       }
298     } else T = a->T;
299     ierr = MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ);CHKERRQ(ierr);
300     ierr = MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ);CHKERRQ(ierr);
301     ierr = PetscFree(T);CHKERRQ(ierr);
302 
303     ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
304     ierr = VecCreate(PETSC_COMM_SELF,&a->w);CHKERRQ(ierr);
305     ierr = VecSetSizes(a->w,n*a->q,n*a->q);CHKERRQ(ierr);
306     ierr = VecSetBlockSize(a->w,a->q);CHKERRQ(ierr);
307     ierr = VecSetType(a->w,VECSEQ);CHKERRQ(ierr);
308 
309     /* create two temporary Index sets for build scatter gather */
310     ierr = ISCreateBlock(PetscObjectComm((PetscObject)a->A),a->q,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
311     ierr = ISCreateStride(PETSC_COMM_SELF,n*a->q,0,1,&to);CHKERRQ(ierr);
312 
313     /* create temporary global vector to generate scatter context */
314     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A),a->q,a->q*a->A->cmap->n,a->q*a->A->cmap->N,NULL,&gvec);CHKERRQ(ierr);
315 
316     /* generate the scatter context */
317     ierr = VecScatterCreate(gvec,from,a->w,to,&a->ctx);CHKERRQ(ierr);
318 
319     ierr = ISDestroy(&from);CHKERRQ(ierr);
320     ierr = ISDestroy(&to);CHKERRQ(ierr);
321     ierr = VecDestroy(&gvec);CHKERRQ(ierr);
322   }
323 
324   A->assembled = PETSC_TRUE;
325   PetscFunctionReturn(0);
326 }
327 
328 PetscErrorCode MatView_SeqKAIJ(Mat A,PetscViewer viewer)
329 {
330   PetscErrorCode ierr;
331   Mat            B;
332 
333   PetscFunctionBegin;
334   ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
335   ierr = MatView(B,viewer);CHKERRQ(ierr);
336   ierr = MatDestroy(&B);CHKERRQ(ierr);
337   PetscFunctionReturn(0);
338 }
339 
340 PetscErrorCode MatView_MPIKAIJ(Mat A,PetscViewer viewer)
341 {
342   PetscErrorCode ierr;
343   Mat            B;
344 
345   PetscFunctionBegin;
346   ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
347   ierr = MatView(B,viewer);CHKERRQ(ierr);
348   ierr = MatDestroy(&B);CHKERRQ(ierr);
349   PetscFunctionReturn(0);
350 }
351 
352 PetscErrorCode MatDestroy_MPIKAIJ(Mat A)
353 {
354   PetscErrorCode ierr;
355   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
356 
357   PetscFunctionBegin;
358   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
359   ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr);
360   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
361   ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr);
362   ierr = VecDestroy(&b->w);CHKERRQ(ierr);
363   ierr = PetscFree(b->S);CHKERRQ(ierr);
364   ierr = PetscFree(b->T);CHKERRQ(ierr);
365   ierr = PetscFree(b->ibdiag);CHKERRQ(ierr);
366   ierr = PetscFree(A->data);CHKERRQ(ierr);
367   PetscFunctionReturn(0);
368 }
369 
370 /* --------------------------------------------------------------------------------------*/
371 
372 /* zz = yy + Axx */
373 PetscErrorCode KAIJMultAdd_Seq(Mat A,Vec xx,Vec yy,Vec zz)
374 {
375   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ*)A->data;
376   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
377   const PetscScalar *s = b->S, *t = b->T;
378   const PetscScalar *x,*v,*bx;
379   PetscScalar       *y,*sums;
380   PetscErrorCode    ierr;
381   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
382   PetscInt          n,i,jrow,j,l,p=b->p,q=b->q,k;
383 
384   PetscFunctionBegin;
385   if (!yy) {
386     ierr = VecSet(zz,0.0);CHKERRQ(ierr);
387   } else {
388     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
389   }
390   if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0);
391 
392   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
393   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
394   idx  = a->j;
395   v    = a->a;
396   ii   = a->i;
397 
398   if (b->isTI) {
399     for (i=0; i<m; i++) {
400       jrow = ii[i];
401       n    = ii[i+1] - jrow;
402       sums = y + p*i;
403       for (j=0; j<n; j++) {
404         for (k=0; k<p; k++) {
405           sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k];
406         }
407       }
408     }
409   } else if (t) {
410     for (i=0; i<m; i++) {
411       jrow = ii[i];
412       n    = ii[i+1] - jrow;
413       sums = y + p*i;
414       bx   = x + q*i;
415       for (j=0; j<n; j++) {
416         for (k=0; k<p; k++) {
417           for (l=0; l<q; l++) {
418             sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l];
419           }
420         }
421       }
422     }
423   }
424   if (s) {
425     for (i=0; i<m; i++) {
426       jrow = ii[i];
427       n    = ii[i+1] - jrow;
428       sums = y + p*i;
429       bx   = x + q*i;
430       if (i < b->AIJ->cmap->n) {
431         for (j=0; j<q; j++) {
432           for (k=0; k<p; k++) {
433             sums[k] += s[k+j*p]*bx[j];
434           }
435         }
436       }
437     }
438   }
439 
440   ierr = PetscLogFlops((2.0*p*q-p)*m+2*p*a->nz);CHKERRQ(ierr);
441   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
442   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
443   PetscFunctionReturn(0);
444 }
445 
446 PetscErrorCode MatMult_SeqKAIJ_N(Mat A,Vec xx,Vec yy)
447 {
448   PetscErrorCode ierr;
449   PetscFunctionBegin;
450   ierr = KAIJMultAdd_Seq(A,xx,PETSC_NULL,yy);CHKERRQ(ierr);
451   PetscFunctionReturn(0);
452 }
453 
454 PetscErrorCode MatMultAdd_SeqKAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
455 {
456   PetscErrorCode ierr;
457   PetscFunctionBegin;
458   ierr = KAIJMultAdd_Seq(A,xx,yy,zz);CHKERRQ(ierr);
459   PetscFunctionReturn(0);
460 }
461 
462 #include <petsc/private/kernels/blockinvert.h>
463 
464 PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ_N(Mat A,const PetscScalar **values)
465 {
466   Mat_SeqKAIJ       *b  = (Mat_SeqKAIJ*)A->data;
467   Mat_SeqAIJ        *a  = (Mat_SeqAIJ*)b->AIJ->data;
468   const PetscScalar *S  = b->S;
469   const PetscScalar *T  = b->T;
470   const PetscScalar *v  = a->a;
471   const PetscInt     p  = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i;
472   PetscErrorCode    ierr;
473   PetscInt          i,j,*v_pivots,dof,dof2;
474   PetscScalar       *diag,aval,*v_work;
475 
476   PetscFunctionBegin;
477   if (p != q) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse.");
478   if ((!S) && (!T) && (!b->isTI)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix.");
479 
480   dof  = p;
481   dof2 = dof*dof;
482 
483   if (b->ibdiagvalid) {
484     if (values) *values = b->ibdiag;
485     PetscFunctionReturn(0);
486   }
487   if (!b->ibdiag) {
488     ierr = PetscMalloc1(dof2*m*sizeof(PetscScalar),&b->ibdiag);CHKERRQ(ierr);
489     ierr = PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar));CHKERRQ(ierr);
490   }
491   if (values) *values = b->ibdiag;
492   diag = b->ibdiag;
493 
494   ierr = PetscMalloc2(dof,&v_work,dof,&v_pivots);CHKERRQ(ierr);
495   for (i=0; i<m; i++) {
496     if (S) {
497       ierr = PetscMemcpy(diag,S,dof2*sizeof(PetscScalar));CHKERRQ(ierr);
498     } else {
499       ierr = PetscMemzero(diag,dof2*sizeof(PetscScalar));CHKERRQ(ierr);
500     }
501     if (b->isTI) {
502       aval = 0;
503       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
504       for (j=0; j<dof; j++) diag[j+dof*j] += aval;
505     } else if (T) {
506       aval = 0;
507       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
508       for (j=0; j<dof2; j++) diag[j] += aval*T[j];
509     }
510     ierr = PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL);CHKERRQ(ierr);
511     diag += dof2;
512   }
513   ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
514 
515   b->ibdiagvalid = PETSC_TRUE;
516   PetscFunctionReturn(0);
517 }
518 
519 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B)
520 {
521   Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data;
522 
523   PetscFunctionBegin;
524   *B = kaij->AIJ;
525   PetscFunctionReturn(0);
526 }
527 
528 PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
529 {
530   PetscErrorCode    ierr;
531   Mat_SeqKAIJ       *kaij = (Mat_SeqKAIJ*) A->data;
532   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)kaij->AIJ->data;
533   const PetscScalar *aa = a->a, *T = kaij->T, *v;
534   const PetscInt    m  = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi;
535   const PetscScalar *b, *xb, *idiag;
536   PetscScalar       *x, *work, *workt, *w, *y, *arr, *t, *arrt;
537   PetscInt          i, j, k, i2, bs, bs2, nz;
538 
539   PetscFunctionBegin;
540   its = its*lits;
541   if (flag & SOR_EISENSTAT) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
542   if (its <= 0)             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
543   if (fshift)               SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
544   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
545   if (p != q) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: Sorry, no support for non-square dense blocks");
546   else        {bs = p; bs2 = bs*bs; }
547 
548   if (!m) PetscFunctionReturn(0);
549 
550   if (!kaij->ibdiagvalid) { ierr = MatInvertBlockDiagonal_SeqKAIJ_N(A,NULL);CHKERRQ(ierr); }
551   idiag = kaij->ibdiag;
552   diag  = a->diag;
553 
554   if (!kaij->sor.setup) {
555     ierr = PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr);CHKERRQ(ierr);
556     kaij->sor.setup = PETSC_TRUE;
557   }
558   y     = kaij->sor.y;
559   w     = kaij->sor.w;
560   work  = kaij->sor.work;
561   t     = kaij->sor.t;
562   arr   = kaij->sor.arr;
563 
564   ierr = VecGetArray(xx,&x);    CHKERRQ(ierr);
565   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
566 
567   if (flag & SOR_ZERO_INITIAL_GUESS) {
568     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
569       PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);                            /* x[0:bs] <- D^{-1} b[0:bs] */
570       ierr   =  PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
571       i2     =  bs;
572       idiag  += bs2;
573       for (i=1; i<m; i++) {
574         v  = aa + ai[i];
575         vi = aj + ai[i];
576         nz = diag[i] - ai[i];
577 
578         if (T) {                /* b - T (Arow * x) */
579           for (k=0; k<bs; k++) w[k] = 0;
580           for (j=0; j<nz; j++) {
581             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
582           }
583           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]);
584           for (k=0; k<bs; k++) t[i2+k] += b[i2+k];
585         } else if (kaij->isTI) {
586           for (k=0; k<bs; k++) t[i2+k] = b[i2+k];
587           for (j=0; j<nz; j++) {
588             for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k];
589           }
590         } else {
591           for (k=0; k<bs; k++) t[i2+k] = b[i2+k];
592         }
593 
594         PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y);
595         for (j=0; j<bs; j++) x[i2+j] = omega * y[j];
596 
597         idiag += bs2;
598         i2    += bs;
599       }
600       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
601       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
602       xb = t;
603     } else xb = b;
604     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
605       idiag = kaij->ibdiag+bs2*(m-1);
606       i2    = bs * (m-1);
607       ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
608       PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
609       i2    -= bs;
610       idiag -= bs2;
611       for (i=m-2; i>=0; i--) {
612         v  = aa + diag[i] + 1 ;
613         vi = aj + diag[i] + 1;
614         nz = ai[i+1] - diag[i] - 1;
615 
616         if (T) {                /* FIXME: This branch untested */
617           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
618           /* copy all rows of x that are needed into contiguous space */
619           workt = work;
620           for (j=0; j<nz; j++) {
621             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
622             workt += bs;
623           }
624           arrt = arr;
625           for (j=0; j<nz; j++) {
626             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
627             for (k=0; k<bs2; k++) arrt[k] *= v[j];
628             arrt += bs2;
629           }
630           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
631         } else if (kaij->isTI) {
632           for (k=0; k<bs; k++) w[k] = t[i2+k];
633           for (j=0; j<nz; j++) {
634             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
635           }
636         }
637 
638         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */
639         for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j];
640 
641         idiag -= bs2;
642         i2    -= bs;
643       }
644       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
645     }
646     its--;
647   }
648   while (its--) {               /* FIXME: This branch not updated */
649     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
650       i2     =  0;
651       idiag  = kaij->ibdiag;
652       for (i=0; i<m; i++) {
653         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
654 
655         v  = aa + ai[i];
656         vi = aj + ai[i];
657         nz = diag[i] - ai[i];
658         workt = work;
659         for (j=0; j<nz; j++) {
660           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
661           workt += bs;
662         }
663         arrt = arr;
664         if (T) {
665           for (j=0; j<nz; j++) {
666             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
667             for (k=0; k<bs2; k++) arrt[k] *= v[j];
668             arrt += bs2;
669           }
670           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
671         } else if (kaij->isTI) {
672           for (j=0; j<nz; j++) {
673             ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
674             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
675             arrt += bs2;
676           }
677           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
678         }
679         ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
680 
681         v  = aa + diag[i] + 1;
682         vi = aj + diag[i] + 1;
683         nz = ai[i+1] - diag[i] - 1;
684         workt = work;
685         for (j=0; j<nz; j++) {
686           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
687           workt += bs;
688         }
689         arrt = arr;
690         if (T) {
691           for (j=0; j<nz; j++) {
692             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
693             for (k=0; k<bs2; k++) arrt[k] *= v[j];
694             arrt += bs2;
695           }
696           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
697         } else if (kaij->isTI) {
698           for (j=0; j<nz; j++) {
699             ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
700             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
701             arrt += bs2;
702           }
703           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
704         }
705 
706         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
707         for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
708 
709         idiag += bs2;
710         i2    += bs;
711       }
712       xb = t;
713     }
714     else xb = b;
715     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
716       idiag = kaij->ibdiag+bs2*(m-1);
717       i2    = bs * (m-1);
718       if (xb == b) {
719         for (i=m-1; i>=0; i--) {
720           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
721 
722           v  = aa + ai[i];
723           vi = aj + ai[i];
724           nz = diag[i] - ai[i];
725           workt = work;
726           for (j=0; j<nz; j++) {
727             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
728             workt += bs;
729           }
730           arrt = arr;
731           if (T) {
732             for (j=0; j<nz; j++) {
733               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
734               for (k=0; k<bs2; k++) arrt[k] *= v[j];
735               arrt += bs2;
736             }
737             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
738           } else if (kaij->isTI) {
739             for (j=0; j<nz; j++) {
740               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
741               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
742               arrt += bs2;
743             }
744             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
745           }
746 
747           v  = aa + diag[i] + 1;
748           vi = aj + diag[i] + 1;
749           nz = ai[i+1] - diag[i] - 1;
750           workt = work;
751           for (j=0; j<nz; j++) {
752             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
753             workt += bs;
754           }
755           arrt = arr;
756           if (T) {
757             for (j=0; j<nz; j++) {
758               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
759               for (k=0; k<bs2; k++) arrt[k] *= v[j];
760               arrt += bs2;
761             }
762             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
763           } else if (kaij->isTI) {
764             for (j=0; j<nz; j++) {
765               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
766               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
767               arrt += bs2;
768             }
769             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
770           }
771 
772           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
773           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
774         }
775       } else {
776         for (i=m-1; i>=0; i--) {
777           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
778           v  = aa + diag[i] + 1;
779           vi = aj + diag[i] + 1;
780           nz = ai[i+1] - diag[i] - 1;
781           workt = work;
782           for (j=0; j<nz; j++) {
783             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
784             workt += bs;
785           }
786           arrt = arr;
787           if (T) {
788             for (j=0; j<nz; j++) {
789               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
790               for (k=0; k<bs2; k++) arrt[k] *= v[j];
791               arrt += bs2;
792             }
793             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
794           } else if (kaij->isTI) {
795             for (j=0; j<nz; j++) {
796               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
797               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
798               arrt += bs2;
799             }
800             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
801           }
802           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
803           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
804         }
805         idiag -= bs2;
806         i2    -= bs;
807       }
808       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
809     }
810   }
811 
812   ierr = VecRestoreArray(xx,&x);    CHKERRQ(ierr);
813   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
814   PetscFunctionReturn(0);
815 }
816 
817 /*===================================================================================*/
818 
819 PetscErrorCode KAIJMultAdd_MPI(Mat A,Vec xx,Vec yy,Vec zz)
820 {
821   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
822   PetscErrorCode ierr;
823 
824   PetscFunctionBegin;
825   if (!yy) {
826     ierr = VecSet(zz,0.0);CHKERRQ(ierr);
827   } else {
828     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
829   }
830   /* start the scatter */
831   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
832   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz);CHKERRQ(ierr);
833   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
834   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
835   PetscFunctionReturn(0);
836 }
837 
838 PetscErrorCode MatMult_MPIKAIJ_dof(Mat A,Vec xx,Vec yy)
839 {
840   PetscErrorCode ierr;
841   PetscFunctionBegin;
842   ierr = KAIJMultAdd_MPI(A,xx,PETSC_NULL,yy);CHKERRQ(ierr);
843   PetscFunctionReturn(0);
844 }
845 
846 PetscErrorCode MatMultAdd_MPIKAIJ_dof(Mat A,Vec xx,Vec yy, Vec zz)
847 {
848   PetscErrorCode ierr;
849   PetscFunctionBegin;
850   ierr = KAIJMultAdd_MPI(A,xx,yy,zz);CHKERRQ(ierr);
851   PetscFunctionReturn(0);
852 }
853 
854 PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ_dof(Mat A,const PetscScalar **values)
855 {
856   Mat_MPIKAIJ     *b = (Mat_MPIKAIJ*)A->data;
857   PetscErrorCode  ierr;
858 
859   PetscFunctionBegin;
860   ierr = (*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values);CHKERRQ(ierr);
861   PetscFunctionReturn(0);
862 }
863 
864 /* ----------------------------------------------------------------*/
865 
866 PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
867 {
868   Mat_SeqKAIJ     *b   = (Mat_SeqKAIJ*) A->data;
869   PetscErrorCode  diag = PETSC_FALSE;
870   PetscErrorCode  ierr;
871   PetscInt        nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c;
872   PetscScalar     *vaij,*v,*S=b->S,*T=b->T;
873 
874   PetscFunctionBegin;
875   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
876   b->getrowactive = PETSC_TRUE;
877   if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row);
878 
879   if ((!S) && (!T) && (!b->isTI)) {
880     if (ncols)    *ncols  = 0;
881     if (cols)     *cols   = NULL;
882     if (values)   *values = NULL;
883     PetscFunctionReturn(0);
884   }
885 
886   if (T || b->isTI) {
887     ierr  = MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij);CHKERRQ(ierr);
888     c     = nzaij;
889     for (i=0; i<nzaij; i++) {
890       /* check if this row contains a diagonal entry */
891       if (colsaij[i] == r) {
892         diag = PETSC_TRUE;
893         c = i;
894       }
895     }
896   } else nzaij = c = 0;
897 
898   /* calculate size of row */
899   nz = 0;
900   if (S)            nz += q;
901   if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q);
902 
903   if (cols || values) {
904     ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr);
905     for (i=0; i<q; i++) {
906       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
907       v[i] = 0.0;
908     }
909     if (b->isTI) {
910       for (i=0; i<nzaij; i++) {
911         for (j=0; j<q; j++) {
912           idx[i*q+j] = colsaij[i]*q+j;
913           v[i*q+j]   = (j==s ? vaij[i] : 0);
914         }
915       }
916     } else if (T) {
917       for (i=0; i<nzaij; i++) {
918         for (j=0; j<q; j++) {
919           idx[i*q+j] = colsaij[i]*q+j;
920           v[i*q+j]   = vaij[i]*T[s+j*p];
921         }
922       }
923     }
924     if (S) {
925       for (j=0; j<q; j++) {
926         idx[c*q+j] = r*q+j;
927         v[c*q+j]  += S[s+j*p];
928       }
929     }
930   }
931 
932   if (ncols)    *ncols  = nz;
933   if (cols)     *cols   = idx;
934   if (values)   *values = v;
935   PetscFunctionReturn(0);
936 }
937 
938 PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
939 {
940   PetscErrorCode ierr;
941   PetscFunctionBegin;
942   ierr = PetscFree2(*idx,*v);CHKERRQ(ierr);
943   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
944   PetscFunctionReturn(0);
945 }
946 
947 PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
948 {
949   Mat_MPIKAIJ     *b      = (Mat_MPIKAIJ*) A->data;
950   Mat             MatAIJ  = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ;
951   Mat             MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ;
952   Mat             AIJ     = b->A;
953   PetscBool       diag    = PETSC_FALSE;
954   PetscErrorCode  ierr;
955   const PetscInt  rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray;
956   PetscInt        nz,*idx,ncolsaij,ncolsoaij,*colsaij,*colsoaij,r,s,c,i,j,lrow;
957   PetscScalar     *v,*vals,*ovals,*S=b->S,*T=b->T;
958 
959   PetscFunctionBegin;
960   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
961   b->getrowactive = PETSC_TRUE;
962   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
963   lrow = row - rstart;
964 
965   if ((!S) && (!T) && (!b->isTI)) {
966     if (ncols)    *ncols  = 0;
967     if (cols)     *cols   = NULL;
968     if (values)   *values = NULL;
969     PetscFunctionReturn(0);
970   }
971 
972   r = lrow/p;
973   s = lrow%p;
974 
975   if (T || b->isTI) {
976     ierr = MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray);
977     ierr = MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals);CHKERRQ(ierr);
978     ierr = MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals);CHKERRQ(ierr);
979 
980     c     = ncolsaij + ncolsoaij;
981     for (i=0; i<ncolsaij; i++) {
982       /* check if this row contains a diagonal entry */
983       if (colsaij[i] == r) {
984         diag = PETSC_TRUE;
985         c = i;
986       }
987     }
988   } else c = 0;
989 
990   /* calculate size of row */
991   nz = 0;
992   if (S)            nz += q;
993   if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q);
994 
995   if (cols || values) {
996     ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr);
997     for (i=0; i<q; i++) {
998       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
999       v[i] = 0.0;
1000     }
1001     if (b->isTI) {
1002       for (i=0; i<ncolsaij; i++) {
1003         for (j=0; j<q; j++) {
1004           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1005           v[i*q+j]   = (j==s ? vals[i] : 0.0);
1006         }
1007       }
1008       for (i=0; i<ncolsoaij; i++) {
1009         for (j=0; j<q; j++) {
1010           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1011           v[(i+ncolsaij)*q+j]   = (j==s ? ovals[i]: 0.0);
1012         }
1013       }
1014     } else if (T) {
1015       for (i=0; i<ncolsaij; i++) {
1016         for (j=0; j<q; j++) {
1017           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1018           v[i*q+j]   = vals[i]*T[s+j*p];
1019         }
1020       }
1021       for (i=0; i<ncolsoaij; i++) {
1022         for (j=0; j<q; j++) {
1023           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1024           v[(i+ncolsaij)*q+j]   = ovals[i]*T[s+j*p];
1025         }
1026       }
1027     }
1028     if (S) {
1029       for (j=0; j<q; j++) {
1030         idx[c*q+j] = (r+rstart/p)*q+j;
1031         v[c*q+j]  += S[s+j*p];
1032       }
1033     }
1034   }
1035 
1036   if (ncols)  *ncols  = nz;
1037   if (cols)   *cols   = idx;
1038   if (values) *values = v;
1039   PetscFunctionReturn(0);
1040 }
1041 
1042 PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1043 {
1044   PetscErrorCode ierr;
1045   PetscFunctionBegin;
1046   ierr = PetscFree2(*idx,*v);CHKERRQ(ierr);
1047   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
1048   PetscFunctionReturn(0);
1049 }
1050 
1051 PetscErrorCode  MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
1052 {
1053   PetscErrorCode ierr;
1054   Mat            A;
1055 
1056   PetscFunctionBegin;
1057   ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1058   ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr);
1059   ierr = MatDestroy(&A);CHKERRQ(ierr);
1060   PetscFunctionReturn(0);
1061 }
1062 
1063 /* ---------------------------------------------------------------------------------- */
1064 /*@C
1065   MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form:
1066 
1067     [I \otimes S + A \otimes T]
1068 
1069   where
1070     S is a dense (p \times q) matrix
1071     T is a dense (p \times q) matrix
1072     A is an AIJ  (n \times n) matrix
1073     I is the identity matrix
1074   The resulting matrix is (np \times nq)
1075 
1076   The matrix type is based on MATSEQAIJ for a sequential matrix A, and MATMPIAIJ for a distributed matrix A.
1077   S is always stored independently on all processes as a PetscScalar array in column-major format.
1078 
1079   Collective
1080 
1081   Input Parameters:
1082 + A - the AIJ matrix
1083 . S - the S matrix, stored as a PetscScalar array (column-major)
1084 . T - the T matrix, stored as a PetscScalar array (column-major)
1085 . p - number of rows in S and T
1086 - q - number of columns in S and T
1087 
1088   Output Parameter:
1089 . kaij - the new KAIJ matrix
1090 
1091   Operations provided:
1092 + MatMult
1093 . MatMultAdd
1094 . MatInvertBlockDiagonal
1095 - MatView
1096 
1097   Level: advanced
1098 
1099 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MATKAIJ
1100 @*/
1101 PetscErrorCode  MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij)
1102 {
1103   PetscErrorCode ierr;
1104   PetscMPIInt    size;
1105 
1106   PetscFunctionBegin;
1107   ierr = MatCreate(PetscObjectComm((PetscObject)A),kaij);CHKERRQ(ierr);
1108   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
1109   if (size == 1) {
1110     ierr = MatSetType(*kaij,MATSEQKAIJ);CHKERRQ(ierr);
1111   } else {
1112     ierr = MatSetType(*kaij,MATMPIKAIJ);CHKERRQ(ierr);
1113   }
1114   ierr = MatKAIJSetAIJ(*kaij,A);CHKERRQ(ierr);
1115   ierr = MatKAIJSetS(*kaij,p,q,S);CHKERRQ(ierr);
1116   ierr = MatKAIJSetT(*kaij,p,q,T);CHKERRQ(ierr);
1117   ierr = MatSetUp(*kaij);
1118   PetscFunctionReturn(0);
1119 }
1120 
1121 /*MC
1122   MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of the following form:
1123 
1124     [I \otimes S + A \otimes T]
1125 
1126   where
1127     S is a dense (p \times q) matrix
1128     T is a dense (p \times q) matrix
1129     A is an AIJ  (n \times n) matrix
1130     I is the identity matrix
1131   The resulting matrix is (np \times nq)
1132 
1133   The matrix type is based on MATSEQAIJ for a sequential matrix A, and MATMPIAIJ for a distributed matrix A.
1134   S and T are always stored independently on all processes as a PetscScalar array in column-major format.
1135 
1136   Operations provided:
1137 + MatMult
1138 . MatMultAdd
1139 . MatInvertBlockDiagonal
1140 - MatView
1141 
1142   Level: advanced
1143 
1144 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ()
1145 M*/
1146 
1147 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A)
1148 {
1149   PetscErrorCode ierr;
1150   Mat_MPIKAIJ    *b;
1151   PetscMPIInt    size;
1152 
1153   PetscFunctionBegin;
1154   ierr     = PetscNewLog(A,&b);CHKERRQ(ierr);
1155   A->data  = (void*)b;
1156 
1157   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1158 
1159   A->ops->setup = MatSetUp_KAIJ;
1160 
1161   b->w    = 0;
1162   ierr    = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
1163   if (size == 1) {
1164     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ);CHKERRQ(ierr);
1165     A->ops->setup               = MatSetUp_KAIJ;
1166     A->ops->destroy             = MatDestroy_SeqKAIJ;
1167     A->ops->view                = MatView_SeqKAIJ;
1168     A->ops->mult                = MatMult_SeqKAIJ_N;
1169     A->ops->multadd             = MatMultAdd_SeqKAIJ_N;
1170     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ_N;
1171     A->ops->getrow              = MatGetRow_SeqKAIJ;
1172     A->ops->restorerow          = MatRestoreRow_SeqKAIJ;
1173     A->ops->sor                 = MatSOR_SeqKAIJ;
1174   } else {
1175     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ);CHKERRQ(ierr);
1176     A->ops->setup               = MatSetUp_KAIJ;
1177     A->ops->destroy             = MatDestroy_MPIKAIJ;
1178     A->ops->view                = MatView_MPIKAIJ;
1179     A->ops->mult                = MatMult_MPIKAIJ_dof;
1180     A->ops->multadd             = MatMultAdd_MPIKAIJ_dof;
1181     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ_dof;
1182     A->ops->getrow              = MatGetRow_MPIKAIJ;
1183     A->ops->restorerow          = MatRestoreRow_MPIKAIJ;
1184     ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ);CHKERRQ(ierr);
1185   }
1186   A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ;
1187   PetscFunctionReturn(0);
1188 }
1189