xref: /petsc/src/mat/impls/kaij/kaij.c (revision fc64b2cf3ddd6035d052b7747f5b776a85846f02)
1 
2 /*
3   Defines the basic matrix operations for the KAIJ  matrix storage format.
4   This format is used to evaluate matrices of the form:
5 
6     [I \otimes S + A \otimes T]
7 
8   where
9     S is a dense (p \times q) matrix
10     T is a dense (p \times q) matrix
11     A is an AIJ  (n \times n) matrix
12     I is the identity matrix
13 
14   The resulting matrix is (np \times nq)
15 
16   We provide:
17      MatMult()
18      MatMultAdd()
19      MatInvertBlockDiagonal()
20   and
21      MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*)
22 
23   This single directory handles both the sequential and parallel codes
24 */
25 
26 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/
27 #include <../src/mat/utils/freespace.h>
28 #include <petsc/private/vecimpl.h>
29 
30 /*@C
31    MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix
32 
33    Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel
34 
35    Input Parameter:
36 .  A - the KAIJ matrix
37 
38    Output Parameter:
39 .  B - the AIJ matrix
40 
41    Level: advanced
42 
43    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
44 
45 .seealso: MatCreateKAIJ()
46 @*/
47 PetscErrorCode  MatKAIJGetAIJ(Mat A,Mat *B)
48 {
49   PetscErrorCode ierr;
50   PetscBool      ismpikaij,isseqkaij;
51 
52   PetscFunctionBegin;
53   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr);
54   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij);CHKERRQ(ierr);
55   if (ismpikaij) {
56     Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
57 
58     *B = b->A;
59   } else if (isseqkaij) {
60     Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
61 
62     *B = b->AIJ;
63   } else {
64     *B = A;
65   }
66   PetscFunctionReturn(0);
67 }
68 
69 /*@C
70    MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix
71 
72    Not Collective; the entire S is stored and returned independently on all processes.
73 
74    Input Parameter:
75 .  A - the KAIJ matrix
76 
77    Output Parameter:
78 .  S - the S matrix, in form of a scalar array in column-major format
79 
80    Notes:
81    The dimensions of the output array can be determined by calling MatGetBlockSizes() on the KAIJ matrix.
82    The row block and column block sizes returned will correspond to the number of rows and columns, respectively, in S.
83 
84    Level: advanced
85 
86 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
87 @*/
88 PetscErrorCode  MatKAIJGetS(Mat A,const PetscScalar **S)
89 {
90   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
91   PetscFunctionBegin;
92   *S = b->S;
93   PetscFunctionReturn(0);
94 }
95 
96 /*@C
97    MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix
98 
99    Not Collective; the entire T is stored and returned independently on all processes
100 
101    Input Parameter:
102 .  A - the KAIJ matrix
103 
104    Output Parameter:
105 .  T - the T matrix, in form of a scalar array in column-major format
106 
107    Notes:
108    The dimensions of the output array can be determined by calling MatGetBlockSizes() on the KAIJ matrix.
109    The row block and column block sizes returned will correspond to the number of rows and columns, respectively, in T.
110 
111    Level: advanced
112 
113 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
114 @*/
115 PetscErrorCode  MatKAIJGetT(Mat A,const PetscScalar **T)
116 {
117   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
118   PetscFunctionBegin;
119   *T = b->T;
120   PetscFunctionReturn(0);
121 }
122 
123 /*@
124    MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix
125 
126    Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel
127 
128    Input Parameters:
129 +  A - the KAIJ matrix
130 -  B - the AIJ matrix
131 
132    Level: advanced
133 
134 .seealso: MatKAIJGetAIJ(), MatKAIJSetS(), MatKAIJSetT()
135 @*/
136 PetscErrorCode MatKAIJSetAIJ(Mat A,Mat B)
137 {
138   PetscErrorCode ierr;
139   PetscMPIInt    size;
140 
141   PetscFunctionBegin;
142   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
143   if (size == 1) {
144     Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
145     a->AIJ = B;
146   } else {
147     Mat_MPIKAIJ *a = (Mat_MPIKAIJ*)A->data;
148     a->A = B;
149   }
150   PetscFunctionReturn(0);
151 }
152 
153 /*@C
154    MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix
155 
156    Logically Collective; the entire S is stored independently on all processes.
157 
158    Input Parameters:
159 +  A - the KAIJ matrix
160 .  p - the number of rows in S
161 .  q - the number of columns in S
162 -  S - the S matrix, in form of a scalar array in column-major format
163 
164    Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix.
165 
166    Level: Advanced
167 
168 .seealso: MatKAIJGetS(), MatKAIJSetT(), MatKAIJSetAIJ()
169 @*/
170 PetscErrorCode MatKAIJSetS(Mat A,PetscInt p,PetscInt q,const PetscScalar S[])
171 {
172   PetscErrorCode ierr;
173   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
174 
175   PetscFunctionBegin;
176   if (a->S) {
177     ierr = PetscFree(a->S);CHKERRQ(ierr);
178   }
179   if (S) {
180     ierr = PetscMalloc(p*q*sizeof(PetscScalar),&a->S);CHKERRQ(ierr);
181     ierr = PetscMemcpy(a->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
182   } else  a->S = NULL;
183 
184   a->p = p;
185   a->q = q;
186 
187   PetscFunctionReturn(0);
188 }
189 
190 /*@C
191    MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix
192 
193    Logically Collective; the entire T is stored independently on all processes.
194 
195    Input Parameters:
196 +  A - the KAIJ matrix
197 .  p - the number of rows in S
198 .  q - the number of columns in S
199 -  T - the T matrix, in form of a scalar array in column-major format
200 
201    Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix.
202 
203    Level: Advanced
204 
205 .seealso: MatKAIJGetT(), MatKAIJSetS(), MatKAIJSetAIJ()
206 @*/
207 PetscErrorCode MatKAIJSetT(Mat A,PetscInt p,PetscInt q,const PetscScalar T[])
208 {
209   PetscErrorCode ierr;
210   PetscInt       i,j;
211   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
212   PetscBool      isTI = PETSC_FALSE;
213 
214   PetscFunctionBegin;
215   /* check if T is an identity matrix */
216   if (T && (p == q)) {
217     isTI = PETSC_TRUE;
218     for (i=0; i<p; i++) {
219       for (j=0; j<q; j++) {
220         if (i == j) {
221           /* diagonal term must be 1 */
222           if (T[i+j*p] != 1.0) isTI = PETSC_FALSE;
223         } else {
224           /* off-diagonal term must be 0 */
225           if (T[i+j*p] != 0.0) isTI = PETSC_FALSE;
226         }
227       }
228     }
229   }
230   a->isTI = isTI;
231 
232   if (a->T) {
233     ierr = PetscFree(a->T);CHKERRQ(ierr);
234   }
235   if (T && (!isTI)) {
236     ierr = PetscMalloc(p*q*sizeof(PetscScalar),&a->T);CHKERRQ(ierr);
237     ierr = PetscMemcpy(a->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
238   } else a->T = NULL;
239 
240   a->p = p;
241   a->q = q;
242   PetscFunctionReturn(0);
243 }
244 
245 PetscErrorCode MatDestroy_SeqKAIJ(Mat A)
246 {
247   PetscErrorCode ierr;
248   Mat_SeqKAIJ    *b = (Mat_SeqKAIJ*)A->data;
249 
250   PetscFunctionBegin;
251   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
252   ierr = PetscFree3(b->S,b->T,b->ibdiag);CHKERRQ(ierr);
253   ierr = PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr);CHKERRQ(ierr);
254   ierr = PetscFree(A->data);CHKERRQ(ierr);
255   PetscFunctionReturn(0);
256 }
257 
258 PetscErrorCode MatSetUp_KAIJ(Mat A)
259 {
260   PetscErrorCode ierr;
261   PetscInt       n;
262   PetscMPIInt    size;
263   Mat_SeqKAIJ    *seqkaij = (Mat_SeqKAIJ*)A->data;
264 
265   PetscFunctionBegin;
266   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
267   if (size == 1) {
268     ierr = MatSetSizes(A,seqkaij->p*seqkaij->AIJ->rmap->n,seqkaij->q*seqkaij->AIJ->cmap->n,seqkaij->p*seqkaij->AIJ->rmap->N,seqkaij->q*seqkaij->AIJ->cmap->N);CHKERRQ(ierr);
269     ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr);
270     ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr);
271     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
272     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
273     ierr = PetscObjectReference((PetscObject)seqkaij->AIJ);CHKERRQ(ierr);
274   } else {
275     Mat_MPIKAIJ *a;
276     Mat_MPIAIJ  *mpiaij;
277     IS          from,to;
278     Vec         gvec;
279     PetscScalar *T;
280     PetscInt    i,j;
281 
282     a = (Mat_MPIKAIJ*)A->data;
283     mpiaij = (Mat_MPIAIJ*)a->A->data;
284     ierr = MatSetSizes(A,a->p*a->A->rmap->n,a->q*a->A->cmap->n,a->p*a->A->rmap->N,a->q*a->A->cmap->N);CHKERRQ(ierr);
285     ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr);
286     ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr);
287     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
288     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
289     ierr = PetscObjectReference((PetscObject)a->A);CHKERRQ(ierr);
290 
291     if (a->isTI) {
292       /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL.
293        * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will
294        * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix
295        * to pass in. */
296       ierr = PetscMalloc(a->p*a->q*sizeof(PetscScalar),&T);CHKERRQ(ierr);
297       for (i=0; i<a->p; i++) {
298         for (j=0; j<a->q; j++) {
299           if (i==j) T[i+j*a->p] = 1.0;
300           else      T[i+j*a->p] = 0.0;
301         }
302       }
303     } else T = a->T;
304     ierr = MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ);CHKERRQ(ierr);
305     ierr = MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ);CHKERRQ(ierr);
306     if (a->isTI) {
307       ierr = PetscFree(T);CHKERRQ(ierr);
308     }
309 
310     ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
311     ierr = VecCreate(PETSC_COMM_SELF,&a->w);CHKERRQ(ierr);
312     ierr = VecSetSizes(a->w,n*a->q,n*a->q);CHKERRQ(ierr);
313     ierr = VecSetBlockSize(a->w,a->q);CHKERRQ(ierr);
314     ierr = VecSetType(a->w,VECSEQ);CHKERRQ(ierr);
315 
316     /* create two temporary Index sets for build scatter gather */
317     ierr = ISCreateBlock(PetscObjectComm((PetscObject)a->A),a->q,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
318     ierr = ISCreateStride(PETSC_COMM_SELF,n*a->q,0,1,&to);CHKERRQ(ierr);
319 
320     /* create temporary global vector to generate scatter context */
321     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A),a->q,a->q*a->A->cmap->n,a->q*a->A->cmap->N,NULL,&gvec);CHKERRQ(ierr);
322 
323     /* generate the scatter context */
324     ierr = VecScatterCreate(gvec,from,a->w,to,&a->ctx);CHKERRQ(ierr);
325 
326     ierr = ISDestroy(&from);CHKERRQ(ierr);
327     ierr = ISDestroy(&to);CHKERRQ(ierr);
328     ierr = VecDestroy(&gvec);CHKERRQ(ierr);
329   }
330 
331   A->assembled = PETSC_TRUE;
332   PetscFunctionReturn(0);
333 }
334 
335 PetscErrorCode MatView_SeqKAIJ(Mat A,PetscViewer viewer)
336 {
337   PetscErrorCode ierr;
338   Mat            B;
339 
340   PetscFunctionBegin;
341   ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
342   ierr = MatView(B,viewer);CHKERRQ(ierr);
343   ierr = MatDestroy(&B);CHKERRQ(ierr);
344   PetscFunctionReturn(0);
345 }
346 
347 PetscErrorCode MatView_MPIKAIJ(Mat A,PetscViewer viewer)
348 {
349   PetscErrorCode ierr;
350   Mat            B;
351 
352   PetscFunctionBegin;
353   ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
354   ierr = MatView(B,viewer);CHKERRQ(ierr);
355   ierr = MatDestroy(&B);CHKERRQ(ierr);
356   PetscFunctionReturn(0);
357 }
358 
359 PetscErrorCode MatDestroy_MPIKAIJ(Mat A)
360 {
361   PetscErrorCode ierr;
362   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
363 
364   PetscFunctionBegin;
365   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
366   ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr);
367   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
368   ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr);
369   ierr = VecDestroy(&b->w);CHKERRQ(ierr);
370   ierr = PetscFree3(b->S,b->T,b->ibdiag);CHKERRQ(ierr);
371   ierr = PetscFree(A->data);CHKERRQ(ierr);
372   PetscFunctionReturn(0);
373 }
374 
375 
376 /* --------------------------------------------------------------------------------------*/
377 
378 /* zz = yy + Axx */
379 PetscErrorCode KAIJMultAdd_Seq(Mat A,Vec xx,Vec yy,Vec zz)
380 {
381   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ*)A->data;
382   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
383   const PetscScalar *s = b->S, *t = b->T;
384   const PetscScalar *x,*v,*bx;
385   PetscScalar       *y,*sums;
386   PetscErrorCode    ierr;
387   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
388   PetscInt          n,i,jrow,j,l,p=b->p,q=b->q,k;
389 
390   PetscFunctionBegin;
391   if (!yy) {
392     ierr = VecSet(zz,0.0);CHKERRQ(ierr);
393   } else {
394     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
395   }
396   if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0);
397 
398   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
399   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
400   idx  = a->j;
401   v    = a->a;
402   ii   = a->i;
403 
404   if (b->isTI) {
405     for (i=0; i<m; i++) {
406       jrow = ii[i];
407       n    = ii[i+1] - jrow;
408       sums = y + p*i;
409       for (j=0; j<n; j++) {
410         for (k=0; k<p; k++) {
411           sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k];
412         }
413       }
414     }
415   } else if (t) {
416     for (i=0; i<m; i++) {
417       jrow = ii[i];
418       n    = ii[i+1] - jrow;
419       sums = y + p*i;
420       bx   = x + q*i;
421       for (j=0; j<n; j++) {
422         for (k=0; k<p; k++) {
423           for (l=0; l<q; l++) {
424             sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l];
425           }
426         }
427       }
428     }
429   }
430   if (s) {
431     for (i=0; i<m; i++) {
432       jrow = ii[i];
433       n    = ii[i+1] - jrow;
434       sums = y + p*i;
435       bx   = x + q*i;
436       if (i < b->AIJ->cmap->n) {
437         for (j=0; j<q; j++) {
438           for (k=0; k<p; k++) {
439             sums[k] += s[k+j*p]*bx[j];
440           }
441         }
442       }
443     }
444   }
445 
446   ierr = PetscLogFlops((2.0*p*q-p)*m+2*p*a->nz);CHKERRQ(ierr);
447   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
448   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
449   PetscFunctionReturn(0);
450 }
451 
452 PetscErrorCode MatMult_SeqKAIJ_N(Mat A,Vec xx,Vec yy)
453 {
454   PetscErrorCode ierr;
455   PetscFunctionBegin;
456   ierr = KAIJMultAdd_Seq(A,xx,PETSC_NULL,yy);CHKERRQ(ierr);
457   PetscFunctionReturn(0);
458 }
459 
460 PetscErrorCode MatMultAdd_SeqKAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
461 {
462   PetscErrorCode ierr;
463   PetscFunctionBegin;
464   ierr = KAIJMultAdd_Seq(A,xx,yy,zz);CHKERRQ(ierr);
465   PetscFunctionReturn(0);
466 }
467 
468 #include <petsc/private/kernels/blockinvert.h>
469 
470 PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ_N(Mat A,const PetscScalar **values)
471 {
472   Mat_SeqKAIJ       *b  = (Mat_SeqKAIJ*)A->data;
473   Mat_SeqAIJ        *a  = (Mat_SeqAIJ*)b->AIJ->data;
474   const PetscScalar *S  = b->S;
475   const PetscScalar *T  = b->T;
476   const PetscScalar *v  = a->a;
477   const PetscInt     p  = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i;
478   PetscErrorCode    ierr;
479   PetscInt          i,j,*v_pivots,dof,dof2;
480   PetscScalar       *diag,aval,*v_work;
481 
482   PetscFunctionBegin;
483   if (p != q) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse.");
484   if ((!S) && (!T) && (!b->isTI)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix.");
485 
486   dof  = p;
487   dof2 = dof*dof;
488 
489   if (b->ibdiagvalid) {
490     if (values) *values = b->ibdiag;
491     PetscFunctionReturn(0);
492   }
493   if (!b->ibdiag) {
494     ierr = PetscMalloc(dof2*m*sizeof(PetscScalar),&b->ibdiag);CHKERRQ(ierr);
495     ierr = PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar));CHKERRQ(ierr);
496   }
497   if (values) *values = b->ibdiag;
498   diag = b->ibdiag;
499 
500   ierr = PetscMalloc2(dof,&v_work,dof,&v_pivots);CHKERRQ(ierr);
501   for (i=0; i<m; i++) {
502     if (S) {
503       ierr = PetscMemcpy(diag,S,dof2*sizeof(PetscScalar));CHKERRQ(ierr);
504     } else {
505       ierr = PetscMemzero(diag,dof2*sizeof(PetscScalar));CHKERRQ(ierr);
506     }
507     if (b->isTI) {
508       aval = 0;
509       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
510       for (j=0; j<dof; j++) diag[j+dof*j] += aval;
511     } else if (T) {
512       aval = 0;
513       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
514       for (j=0; j<dof2; j++) diag[j] += aval*T[j];
515     }
516     ierr = PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL);CHKERRQ(ierr);
517     diag += dof2;
518   }
519   ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
520 
521   b->ibdiagvalid = PETSC_TRUE;
522   PetscFunctionReturn(0);
523 }
524 
525 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B)
526 {
527   Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data;
528 
529   PetscFunctionBegin;
530   *B = kaij->AIJ;
531   PetscFunctionReturn(0);
532 }
533 
534 PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
535 {
536   PetscErrorCode    ierr;
537   Mat_SeqKAIJ       *kaij = (Mat_SeqKAIJ*) A->data;
538   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)kaij->AIJ->data;
539   const PetscScalar *aa = a->a, *T = kaij->T, *v;
540   const PetscInt    m  = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi;
541   const PetscScalar *b, *xb, *idiag;
542   PetscScalar       *x, *work, *workt, *w, *y, *arr, *t, *arrt;
543   PetscInt          i, j, k, i2, bs, bs2, nz;
544 
545   PetscFunctionBegin;
546   its = its*lits;
547   if (flag & SOR_EISENSTAT) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
548   if (its <= 0)             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
549   if (fshift)               SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
550   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
551   if (p != q) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: Sorry, no support for non-square dense blocks");
552   else        {bs = p; bs2 = bs*bs; }
553 
554   if (!m) PetscFunctionReturn(0);
555 
556   if (!kaij->ibdiagvalid) { ierr = MatInvertBlockDiagonal_SeqKAIJ_N(A,NULL);CHKERRQ(ierr); }
557   idiag = kaij->ibdiag;
558   diag  = a->diag;
559 
560   if (!kaij->sor.setup) {
561     ierr = PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr);CHKERRQ(ierr);
562     kaij->sor.setup = PETSC_TRUE;
563   }
564   y     = kaij->sor.y;
565   w     = kaij->sor.w;
566   work  = kaij->sor.work;
567   t     = kaij->sor.t;
568   arr   = kaij->sor.arr;
569 
570   ierr = VecGetArray(xx,&x);    CHKERRQ(ierr);
571   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
572 
573   if (flag & SOR_ZERO_INITIAL_GUESS) {
574     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
575       PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);                            /* x[0:bs] <- D^{-1} b[0:bs] */
576       ierr   =  PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
577       i2     =  bs;
578       idiag  += bs2;
579       for (i=1; i<m; i++) {
580         v  = aa + ai[i];
581         vi = aj + ai[i];
582         nz = diag[i] - ai[i];
583 
584         if (T) {                /* b - T (Arow * x) */
585           for (k=0; k<bs; k++) w[k] = 0;
586           for (j=0; j<nz; j++) {
587             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
588           }
589           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]);
590           for (k=0; k<bs; k++) t[i2+k] += b[i2+k];
591         } else if (kaij->isTI) {
592           for (k=0; k<bs; k++) t[i2+k] = b[i2+k];
593           for (j=0; j<nz; j++) {
594             for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k];
595           }
596         } else {
597           for (k=0; k<bs; k++) t[i2+k] = b[i2+k];
598         }
599 
600         PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y);
601         for (j=0; j<bs; j++) x[i2+j] = omega * y[j];
602 
603         idiag += bs2;
604         i2    += bs;
605       }
606       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
607       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
608       xb = t;
609     } else xb = b;
610     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
611       idiag = kaij->ibdiag+bs2*(m-1);
612       i2    = bs * (m-1);
613       ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
614       PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
615       i2    -= bs;
616       idiag -= bs2;
617       for (i=m-2; i>=0; i--) {
618         v  = aa + diag[i] + 1 ;
619         vi = aj + diag[i] + 1;
620         nz = ai[i+1] - diag[i] - 1;
621 
622         if (T) {                /* FIXME: This branch untested */
623           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
624           /* copy all rows of x that are needed into contiguous space */
625           workt = work;
626           for (j=0; j<nz; j++) {
627             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
628             workt += bs;
629           }
630           arrt = arr;
631           for (j=0; j<nz; j++) {
632             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
633             for (k=0; k<bs2; k++) arrt[k] *= v[j];
634             arrt += bs2;
635           }
636           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
637         } else if (kaij->isTI) {
638           for (k=0; k<bs; k++) w[k] = t[i2+k];
639           for (j=0; j<nz; j++) {
640             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
641           }
642         }
643 
644         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */
645         for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j];
646 
647         idiag -= bs2;
648         i2    -= bs;
649       }
650       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
651     }
652     its--;
653   }
654   while (its--) {               /* FIXME: This branch not updated */
655     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
656       i2     =  0;
657       idiag  = kaij->ibdiag;
658       for (i=0; i<m; i++) {
659         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
660 
661         v  = aa + ai[i];
662         vi = aj + ai[i];
663         nz = diag[i] - ai[i];
664         workt = work;
665         for (j=0; j<nz; j++) {
666           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
667           workt += bs;
668         }
669         arrt = arr;
670         if (T) {
671           for (j=0; j<nz; j++) {
672             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
673             for (k=0; k<bs2; k++) arrt[k] *= v[j];
674             arrt += bs2;
675           }
676           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
677         } else if (kaij->isTI) {
678           for (j=0; j<nz; j++) {
679             ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
680             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
681             arrt += bs2;
682           }
683           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
684         }
685         ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
686 
687         v  = aa + diag[i] + 1;
688         vi = aj + diag[i] + 1;
689         nz = ai[i+1] - diag[i] - 1;
690         workt = work;
691         for (j=0; j<nz; j++) {
692           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
693           workt += bs;
694         }
695         arrt = arr;
696         if (T) {
697           for (j=0; j<nz; j++) {
698             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
699             for (k=0; k<bs2; k++) arrt[k] *= v[j];
700             arrt += bs2;
701           }
702           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
703         } else if (kaij->isTI) {
704           for (j=0; j<nz; j++) {
705             ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
706             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
707             arrt += bs2;
708           }
709           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
710         }
711 
712         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
713         for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
714 
715         idiag += bs2;
716         i2    += bs;
717       }
718       xb = t;
719     }
720     else xb = b;
721     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
722       idiag = kaij->ibdiag+bs2*(m-1);
723       i2    = bs * (m-1);
724       if (xb == b) {
725         for (i=m-1; i>=0; i--) {
726           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
727 
728           v  = aa + ai[i];
729           vi = aj + ai[i];
730           nz = diag[i] - ai[i];
731           workt = work;
732           for (j=0; j<nz; j++) {
733             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
734             workt += bs;
735           }
736           arrt = arr;
737           if (T) {
738             for (j=0; j<nz; j++) {
739               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
740               for (k=0; k<bs2; k++) arrt[k] *= v[j];
741               arrt += bs2;
742             }
743             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
744           } else if (kaij->isTI) {
745             for (j=0; j<nz; j++) {
746               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
747               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
748               arrt += bs2;
749             }
750             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
751           }
752 
753           v  = aa + diag[i] + 1;
754           vi = aj + diag[i] + 1;
755           nz = ai[i+1] - diag[i] - 1;
756           workt = work;
757           for (j=0; j<nz; j++) {
758             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
759             workt += bs;
760           }
761           arrt = arr;
762           if (T) {
763             for (j=0; j<nz; j++) {
764               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
765               for (k=0; k<bs2; k++) arrt[k] *= v[j];
766               arrt += bs2;
767             }
768             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
769           } else if (kaij->isTI) {
770             for (j=0; j<nz; j++) {
771               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
772               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
773               arrt += bs2;
774             }
775             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
776           }
777 
778           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
779           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
780         }
781       } else {
782         for (i=m-1; i>=0; i--) {
783           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
784           v  = aa + diag[i] + 1;
785           vi = aj + diag[i] + 1;
786           nz = ai[i+1] - diag[i] - 1;
787           workt = work;
788           for (j=0; j<nz; j++) {
789             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
790             workt += bs;
791           }
792           arrt = arr;
793           if (T) {
794             for (j=0; j<nz; j++) {
795               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
796               for (k=0; k<bs2; k++) arrt[k] *= v[j];
797               arrt += bs2;
798             }
799             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
800           } else if (kaij->isTI) {
801             for (j=0; j<nz; j++) {
802               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
803               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
804               arrt += bs2;
805             }
806             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
807           }
808           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
809           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
810         }
811         idiag -= bs2;
812         i2    -= bs;
813       }
814       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
815     }
816   }
817 
818   ierr = VecRestoreArray(xx,&x);    CHKERRQ(ierr);
819   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
820   PetscFunctionReturn(0);
821 }
822 
823 /*===================================================================================*/
824 
825 PetscErrorCode KAIJMultAdd_MPI(Mat A,Vec xx,Vec yy,Vec zz)
826 {
827   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
828   PetscErrorCode ierr;
829 
830   PetscFunctionBegin;
831   if (!yy) {
832     ierr = VecSet(zz,0.0);CHKERRQ(ierr);
833   } else {
834     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
835   }
836   /* start the scatter */
837   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
838   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz);CHKERRQ(ierr);
839   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
840   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
841   PetscFunctionReturn(0);
842 }
843 
844 PetscErrorCode MatMult_MPIKAIJ_dof(Mat A,Vec xx,Vec yy)
845 {
846   PetscErrorCode ierr;
847   PetscFunctionBegin;
848   ierr = KAIJMultAdd_MPI(A,xx,PETSC_NULL,yy);CHKERRQ(ierr);
849   PetscFunctionReturn(0);
850 }
851 
852 PetscErrorCode MatMultAdd_MPIKAIJ_dof(Mat A,Vec xx,Vec yy, Vec zz)
853 {
854   PetscErrorCode ierr;
855   PetscFunctionBegin;
856   ierr = KAIJMultAdd_MPI(A,xx,yy,zz);CHKERRQ(ierr);
857   PetscFunctionReturn(0);
858 }
859 
860 PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ_dof(Mat A,const PetscScalar **values)
861 {
862   Mat_MPIKAIJ     *b = (Mat_MPIKAIJ*)A->data;
863   PetscErrorCode  ierr;
864 
865   PetscFunctionBegin;
866   ierr = (*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values);CHKERRQ(ierr);
867   PetscFunctionReturn(0);
868 }
869 
870 /* ----------------------------------------------------------------*/
871 
872 PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
873 {
874   Mat_SeqKAIJ     *b    = (Mat_SeqKAIJ*) A->data;
875   PetscErrorCode  ierr,diag;
876   PetscInt        nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c;
877   PetscScalar     *vaij,*v,*S=b->S,*T=b->T;
878 
879   PetscFunctionBegin;
880   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
881   b->getrowactive = PETSC_TRUE;
882   if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row);
883 
884   if ((!S) && (!T) && (!b->isTI)) {
885     if (ncols)    *ncols  = 0;
886     if (cols)     *cols   = NULL;
887     if (values)   *values = NULL;
888     PetscFunctionReturn(0);
889   }
890 
891   if (T || b->isTI) {
892     ierr  = MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij);CHKERRQ(ierr);
893     diag  = PETSC_FALSE;
894     c     = nzaij;
895     for (i=0; i<nzaij; i++) {
896       /* check if this row contains a diagonal entry */
897       if (colsaij[i] == r) {
898         diag = PETSC_TRUE;
899         c = i;
900       }
901     }
902   } else nzaij = c = 0;
903 
904   /* calculate size of row */
905   nz = 0;
906   if (S)            nz += q;
907   if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q);
908 
909   if (cols || values) {
910     ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr);
911     if (b->isTI) {
912       for (i=0; i<nzaij; i++) {
913         for (j=0; j<q; j++) {
914           idx[i*q+j] = colsaij[i]*q+j;
915           v[i*q+j]   = (j==s ? vaij[i] : 0);
916         }
917       }
918     } else if (T) {
919       for (i=0; i<nzaij; i++) {
920         for (j=0; j<q; j++) {
921           idx[i*q+j] = colsaij[i]*q+j;
922           v[i*q+j]   = vaij[i]*T[s+j*p];
923         }
924       }
925     }
926     if (S) {
927       for (j=0; j<q; j++) {
928         idx[c*q+j] = r*q+j;
929         v[c*q+j]  += S[s+j*p];
930       }
931     }
932   }
933 
934   if (ncols)    *ncols  = nz;
935   if (cols)     *cols   = idx;
936   if (values)   *values = v;
937 
938   PetscFunctionReturn(0);
939 }
940 
941 PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
942 {
943   PetscErrorCode ierr;
944   PetscFunctionBegin;
945   ierr = PetscFree2(*idx,*v);CHKERRQ(ierr);
946   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
947   PetscFunctionReturn(0);
948 }
949 
950 PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
951 {
952   Mat_MPIKAIJ     *b      = (Mat_MPIKAIJ*) A->data;
953   Mat             MatAIJ  = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ;
954   Mat             MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ;
955   Mat             AIJ     = b->A;
956   PetscBool       diag    = PETSC_FALSE;
957   PetscErrorCode  ierr;
958   const PetscInt  rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray;
959   PetscInt        nz,*idx,ncolsaij,ncolsoaij,*colsaij,*colsoaij,r,s,c,i,j,lrow;
960   PetscScalar     *v,*vals,*ovals,*S=b->S,*T=b->T;
961 
962   PetscFunctionBegin;
963   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
964   b->getrowactive = PETSC_TRUE;
965   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
966   lrow = row - rstart;
967 
968   if ((!S) && (!T) && (!b->isTI)) {
969     if (ncols)    *ncols  = 0;
970     if (cols)     *cols   = NULL;
971     if (values)   *values = NULL;
972     PetscFunctionReturn(0);
973   }
974 
975   r = lrow/p;
976   s = lrow%p;
977 
978   if (T || b->isTI) {
979     ierr = MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray);
980     ierr = MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals);CHKERRQ(ierr);
981     ierr = MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals);CHKERRQ(ierr);
982 
983     c     = ncolsaij + ncolsoaij;
984     for (i=0; i<ncolsaij; i++) {
985       /* check if this row contains a diagonal entry */
986       if (colsaij[i] == r) {
987         diag = PETSC_TRUE;
988         c = i;
989       }
990     }
991   } else c = 0;
992 
993   /* calculate size of row */
994   nz = 0;
995   if (S)            nz += q;
996   if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q);
997 
998   if (cols || values) {
999     ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr);
1000     if (b->isTI) {
1001       for (i=0; i<ncolsaij; i++) {
1002         for (j=0; j<q; j++) {
1003           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1004           v[i*q+j]   = (j==s ? vals[i] : 0.0);
1005         }
1006       }
1007       for (i=0; i<ncolsoaij; i++) {
1008         for (j=0; j<q; j++) {
1009           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1010           v[(i+ncolsaij)*q+j]   = (j==s ? ovals[i]: 0.0);
1011         }
1012       }
1013     } else if (T) {
1014       for (i=0; i<ncolsaij; i++) {
1015         for (j=0; j<q; j++) {
1016           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1017           v[i*q+j]   = vals[i]*T[s+j*p];
1018         }
1019       }
1020       for (i=0; i<ncolsoaij; i++) {
1021         for (j=0; j<q; j++) {
1022           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1023           v[(i+ncolsaij)*q+j]   = ovals[i]*T[s+j*p];
1024         }
1025       }
1026     }
1027     if (S) {
1028       for (j=0; j<q; j++) {
1029         idx[c*q+j] = (r+rstart/p)*q+j;
1030         v[c*q+j]  += S[s+j*p];
1031       }
1032     }
1033   }
1034 
1035   if (ncols)  *ncols  = nz;
1036   if (cols)   *cols   = idx;
1037   if (values) *values = v;
1038   PetscFunctionReturn(0);
1039 }
1040 
1041 PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1042 {
1043   PetscErrorCode ierr;
1044   PetscFunctionBegin;
1045   ierr = PetscFree2(*idx,*v);CHKERRQ(ierr);
1046   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
1047   PetscFunctionReturn(0);
1048 }
1049 
1050 PetscErrorCode  MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
1051 {
1052   PetscErrorCode ierr;
1053   Mat            A;
1054 
1055   PetscFunctionBegin;
1056   ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1057   ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr);
1058   ierr = MatDestroy(&A);CHKERRQ(ierr);
1059   PetscFunctionReturn(0);
1060 }
1061 
1062 /* ---------------------------------------------------------------------------------- */
1063 /*@C
1064   MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form:
1065 
1066     [I \otimes S + A \otimes T]
1067 
1068   where
1069     S is a dense (p \times q) matrix
1070     T is a dense (p \times q) matrix
1071     A is an AIJ  (n \times n) matrix
1072     I is the identity matrix
1073   The resulting matrix is (np \times nq)
1074 
1075   The matrix type is based on MATSEQAIJ for a sequential matrix A, and MATMPIAIJ for a distributed matrix A.
1076   S is always stored independently on all processes as a PetscScalar array in column-major format.
1077 
1078   Collective
1079 
1080   Input Parameters:
1081 + A - the AIJ matrix
1082 . S - the S matrix, stored as a PetscScalar array (column-major)
1083 . T - the T matrix, stored as a PetscScalar array (column-major)
1084 . p - number of rows in S and T
1085 - q - number of columns in S and T
1086 
1087   Output Parameter:
1088 . kaij - the new KAIJ matrix
1089 
1090   Operations provided:
1091 + MatMult
1092 . MatMultAdd
1093 . MatInvertBlockDiagonal
1094 - MatView
1095 
1096   Level: advanced
1097 
1098 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MATKAIJ
1099 @*/
1100 PetscErrorCode  MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij)
1101 {
1102   PetscErrorCode ierr;
1103   PetscMPIInt    size;
1104 
1105   PetscFunctionBegin;
1106   ierr = MatCreate(PetscObjectComm((PetscObject)A),kaij);CHKERRQ(ierr);
1107   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
1108   if (size == 1) {
1109     ierr = MatSetType(*kaij,MATSEQKAIJ);CHKERRQ(ierr);
1110   } else {
1111     ierr = MatSetType(*kaij,MATMPIKAIJ);CHKERRQ(ierr);
1112   }
1113   ierr = MatKAIJSetAIJ(*kaij,A);CHKERRQ(ierr);
1114   ierr = MatKAIJSetS(*kaij,p,q,S);CHKERRQ(ierr);
1115   ierr = MatKAIJSetT(*kaij,p,q,T);CHKERRQ(ierr);
1116   ierr = MatSetUp(*kaij);
1117   PetscFunctionReturn(0);
1118 }
1119 
1120 /*MC
1121   MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of the following form:
1122 
1123     [I \otimes S + A \otimes T]
1124 
1125   where
1126     S is a dense (p \times q) matrix
1127     T is a dense (p \times q) matrix
1128     A is an AIJ  (n \times n) matrix
1129     I is the identity matrix
1130   The resulting matrix is (np \times nq)
1131 
1132   The matrix type is based on MATSEQAIJ for a sequential matrix A, and MATMPIAIJ for a distributed matrix A.
1133   S and T are always stored independently on all processes as a PetscScalar array in column-major format.
1134 
1135   Operations provided:
1136 + MatMult
1137 . MatMultAdd
1138 . MatInvertBlockDiagonal
1139 - MatView
1140 
1141   Level: advanced
1142 
1143 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ()
1144 M*/
1145 
1146 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A)
1147 {
1148   PetscErrorCode ierr;
1149   Mat_MPIKAIJ    *b;
1150   PetscMPIInt    size;
1151 
1152   PetscFunctionBegin;
1153   ierr     = PetscNewLog(A,&b);CHKERRQ(ierr);
1154   A->data  = (void*)b;
1155 
1156   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1157 
1158   A->ops->setup = MatSetUp_KAIJ;
1159 
1160   b->w    = 0;
1161   ierr    = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
1162   if (size == 1) {
1163     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ);CHKERRQ(ierr);
1164 
1165     A->ops->setup               = MatSetUp_KAIJ;
1166     A->ops->destroy             = MatDestroy_SeqKAIJ;
1167     A->ops->view                = MatView_SeqKAIJ;
1168     A->ops->mult                = MatMult_SeqKAIJ_N;
1169     A->ops->multadd             = MatMultAdd_SeqKAIJ_N;
1170     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ_N;
1171     A->ops->getrow              = MatGetRow_SeqKAIJ;
1172     A->ops->restorerow          = MatRestoreRow_SeqKAIJ;
1173     A->ops->sor                 = MatSOR_SeqKAIJ;
1174   } else {
1175     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ);CHKERRQ(ierr);
1176     A->ops->setup               = MatSetUp_KAIJ;
1177     A->ops->destroy             = MatDestroy_MPIKAIJ;
1178     A->ops->view                = MatView_MPIKAIJ;
1179     A->ops->mult                = MatMult_MPIKAIJ_dof;
1180     A->ops->multadd             = MatMultAdd_MPIKAIJ_dof;
1181     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ_dof;
1182     A->ops->getrow              = MatGetRow_MPIKAIJ;
1183     A->ops->restorerow          = MatRestoreRow_MPIKAIJ;
1184     ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ);CHKERRQ(ierr);
1185   }
1186   A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ;
1187   PetscFunctionReturn(0);
1188 }
1189