xref: /petsc/src/mat/impls/kaij/kaij.c (revision 38822f9d8c015411d8ef056d1ffc8efe9fa609f3)
1 
2 /*
3   Defines the basic matrix operations for the KAIJ  matrix storage format.
4   This format is used to evaluate matrices of the form:
5 
6     [I \otimes S + A \otimes T]
7 
8   where
9     S is a dense (p \times q) matrix
10     T is a dense (p \times q) matrix
11     A is an AIJ  (n \times n) matrix
12     I is the identity matrix
13 
14   The resulting matrix is (np \times nq)
15 
16   We provide:
17      MatMult()
18      MatMultAdd()
19      MatInvertBlockDiagonal()
20   and
21      MatCreateKAIJ(Mat,PetscInt,PetscInt,const PetscScalar[],const PetscScalar[],Mat*)
22 
23   This single directory handles both the sequential and parallel codes
24 */
25 
26 #include <../src/mat/impls/kaij/kaij.h> /*I "petscmat.h" I*/
27 #include <../src/mat/utils/freespace.h>
28 #include <petsc/private/vecimpl.h>
29 
30 /*@C
31    MatKAIJGetAIJ - Get the AIJ matrix describing the blockwise action of the KAIJ matrix
32 
33    Not Collective, but if the KAIJ matrix is parallel, the AIJ matrix is also parallel
34 
35    Input Parameter:
36 .  A - the KAIJ matrix
37 
38    Output Parameter:
39 .  B - the AIJ matrix
40 
41    Level: advanced
42 
43    Notes: The reference count on the AIJ matrix is not increased so you should not destroy it.
44 
45 .seealso: MatCreateKAIJ()
46 @*/
47 PetscErrorCode  MatKAIJGetAIJ(Mat A,Mat *B)
48 {
49   PetscErrorCode ierr;
50   PetscBool      ismpikaij,isseqkaij;
51 
52   PetscFunctionBegin;
53   ierr = PetscObjectTypeCompare((PetscObject)A,MATMPIKAIJ,&ismpikaij);CHKERRQ(ierr);
54   ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQKAIJ,&isseqkaij);CHKERRQ(ierr);
55   if (ismpikaij) {
56     Mat_MPIKAIJ *b = (Mat_MPIKAIJ*)A->data;
57 
58     *B = b->A;
59   } else if (isseqkaij) {
60     Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
61 
62     *B = b->AIJ;
63   } else {
64     *B = A;
65   }
66   PetscFunctionReturn(0);
67 }
68 
69 /*@C
70    MatKAIJGetS - Get the S matrix describing the shift action of the KAIJ matrix
71 
72    Not Collective; the entire S is stored and returned independently on all processes.
73 
74    Input Parameter:
75 .  A - the KAIJ matrix
76 
77    Output Parameter:
78 .  S - the S matrix, in form of a scalar array in column-major format
79 
80    Notes:
81    The dimensions of the output array can be determined by calling MatGetBlockSizes() on the KAIJ matrix.
82    The row block and column block sizes returned will correspond to the number of rows and columns, respectively, in S.
83 
84    Level: advanced
85 
86 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
87 @*/
88 PetscErrorCode  MatKAIJGetS(Mat A,const PetscScalar **S)
89 {
90   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
91   PetscFunctionBegin;
92   *S = b->S;
93   PetscFunctionReturn(0);
94 }
95 
96 /*@C
97    MatKAIJGetT - Get the transformation matrix T associated with the KAIJ matrix
98 
99    Not Collective; the entire T is stored and returned independently on all processes
100 
101    Input Parameter:
102 .  A - the KAIJ matrix
103 
104    Output Parameter:
105 .  T - the T matrix, in form of a scalar array in column-major format
106 
107    Notes:
108    The dimensions of the output array can be determined by calling MatGetBlockSizes() on the KAIJ matrix.
109    The row block and column block sizes returned will correspond to the number of rows and columns, respectively, in T.
110 
111    Level: advanced
112 
113 .seealso: MatCreateKAIJ(), MatGetBlockSizes()
114 @*/
115 PetscErrorCode  MatKAIJGetT(Mat A,const PetscScalar **T)
116 {
117   Mat_SeqKAIJ *b = (Mat_SeqKAIJ*)A->data;
118   PetscFunctionBegin;
119   *T = b->T;
120   PetscFunctionReturn(0);
121 }
122 
123 /*@
124    MatKAIJSetAIJ - Set the AIJ matrix describing the blockwise action of the KAIJ matrix
125 
126    Logically Collective; if the AIJ matrix is parallel, the KAIJ matrix is also parallel
127 
128    Input Parameters:
129 +  A - the KAIJ matrix
130 -  B - the AIJ matrix
131 
132    Level: advanced
133 
134 .seealso: MatKAIJGetAIJ(), MatKAIJSetS(), MatKAIJSetT()
135 @*/
136 PetscErrorCode MatKAIJSetAIJ(Mat A,Mat B)
137 {
138   PetscErrorCode ierr;
139   PetscMPIInt    size;
140 
141   PetscFunctionBegin;
142   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
143   if (size == 1) {
144     Mat_SeqKAIJ *a = (Mat_SeqKAIJ*)A->data;
145     a->AIJ = B;
146   } else {
147     Mat_MPIKAIJ *a = (Mat_MPIKAIJ*)A->data;
148     a->A = B;
149   }
150   PetscFunctionReturn(0);
151 }
152 
153 /*@C
154    MatKAIJSetS - Set the S matrix describing the shift action of the KAIJ matrix
155 
156    Logically Collective; the entire S is stored independently on all processes.
157 
158    Input Parameters:
159 +  A - the KAIJ matrix
160 .  p - the number of rows in S
161 .  q - the number of columns in S
162 -  S - the S matrix, in form of a scalar array in column-major format
163 
164    Notes: The dimensions p and q must match those of the transformation matrix T associated with the KAIJ matrix.
165 
166    Level: Advanced
167 
168 .seealso: MatKAIJGetS(), MatKAIJSetT(), MatKAIJSetAIJ()
169 @*/
170 PetscErrorCode MatKAIJSetS(Mat A,PetscInt p,PetscInt q,const PetscScalar S[])
171 {
172   PetscErrorCode ierr;
173   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
174 
175   PetscFunctionBegin;
176   if (a->S) {
177     ierr = PetscFree(a->S);CHKERRQ(ierr);
178   }
179   if (S) {
180     ierr = PetscMalloc(p*q*sizeof(PetscScalar),&a->S);CHKERRQ(ierr);
181     ierr = PetscMemcpy(a->S,S,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
182   } else  a->S = NULL;
183 
184   a->p = p;
185   a->q = q;
186 
187   PetscFunctionReturn(0);
188 }
189 
190 /*@C
191    MatKAIJSetT - Set the transformation matrix T associated with the KAIJ matrix
192 
193    Logically Collective; the entire T is stored independently on all processes.
194 
195    Input Parameters:
196 +  A - the KAIJ matrix
197 .  p - the number of rows in S
198 .  q - the number of columns in S
199 -  T - the T matrix, in form of a scalar array in column-major format
200 
201    Notes: The dimensions p and q must match those of the shift matrix S associated with the KAIJ matrix.
202 
203    Level: Advanced
204 
205 .seealso: MatKAIJGetT(), MatKAIJSetS(), MatKAIJSetAIJ()
206 @*/
207 PetscErrorCode MatKAIJSetT(Mat A,PetscInt p,PetscInt q,const PetscScalar T[])
208 {
209   PetscErrorCode ierr;
210   PetscInt       i,j;
211   Mat_SeqKAIJ    *a = (Mat_SeqKAIJ*)A->data;
212   PetscBool      isTI = PETSC_FALSE;
213 
214   PetscFunctionBegin;
215   /* check if T is an identity matrix */
216   if (T && (p == q)) {
217     isTI = PETSC_TRUE;
218     for (i=0; i<p; i++) {
219       for (j=0; j<q; j++) {
220         if (i == j) {
221           /* diagonal term must be 1 */
222           if (T[i+j*p] != 1.0) isTI = PETSC_FALSE;
223         } else {
224           /* off-diagonal term must be 0 */
225           if (T[i+j*p] != 0.0) isTI = PETSC_FALSE;
226         }
227       }
228     }
229   }
230   a->isTI = isTI;
231 
232   if (a->T) {
233     ierr = PetscFree(a->T);CHKERRQ(ierr);
234   }
235   if (T && (!isTI)) {
236     ierr = PetscMalloc(p*q*sizeof(PetscScalar),&a->T);CHKERRQ(ierr);
237     ierr = PetscMemcpy(a->T,T,p*q*sizeof(PetscScalar));CHKERRQ(ierr);
238   } else a->T = NULL;
239 
240   a->p = p;
241   a->q = q;
242   PetscFunctionReturn(0);
243 }
244 
245 PetscErrorCode MatDestroy_SeqKAIJ(Mat A)
246 {
247   PetscErrorCode ierr;
248   Mat_SeqKAIJ    *b = (Mat_SeqKAIJ*)A->data;
249 
250   PetscFunctionBegin;
251   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
252   ierr = PetscFree3(b->S,b->T,b->ibdiag);CHKERRQ(ierr);
253   ierr = PetscFree5(b->sor.w,b->sor.y,b->sor.work,b->sor.t,b->sor.arr);CHKERRQ(ierr);
254   ierr = PetscFree(A->data);CHKERRQ(ierr);
255   PetscFunctionReturn(0);
256 }
257 
258 PetscErrorCode MatSetUp_KAIJ(Mat A)
259 {
260   PetscErrorCode ierr;
261   PetscInt       n;
262   PetscMPIInt    size;
263   Mat_SeqKAIJ    *seqkaij = (Mat_SeqKAIJ*)A->data;
264 
265   PetscFunctionBegin;
266   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
267   if (size == 1) {
268     ierr = MatSetSizes(A,seqkaij->p*seqkaij->AIJ->rmap->n,seqkaij->q*seqkaij->AIJ->cmap->n,seqkaij->p*seqkaij->AIJ->rmap->N,seqkaij->q*seqkaij->AIJ->cmap->N);CHKERRQ(ierr);
269     ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr);
270     ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr);
271     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
272     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
273     ierr = PetscObjectReference((PetscObject)seqkaij->AIJ);CHKERRQ(ierr);
274   } else {
275     Mat_MPIKAIJ *a;
276     Mat_MPIAIJ  *mpiaij;
277     IS          from,to;
278     Vec         gvec;
279     PetscScalar *T;
280     PetscInt    i,j;
281 
282     a = (Mat_MPIKAIJ*)A->data;
283     mpiaij = (Mat_MPIAIJ*)a->A->data;
284     ierr = MatSetSizes(A,a->p*a->A->rmap->n,a->q*a->A->cmap->n,a->p*a->A->rmap->N,a->q*a->A->cmap->N);CHKERRQ(ierr);
285     ierr = PetscLayoutSetBlockSize(A->rmap,seqkaij->p);CHKERRQ(ierr);
286     ierr = PetscLayoutSetBlockSize(A->cmap,seqkaij->q);CHKERRQ(ierr);
287     ierr = PetscLayoutSetUp(A->rmap);CHKERRQ(ierr);
288     ierr = PetscLayoutSetUp(A->cmap);CHKERRQ(ierr);
289     ierr = PetscObjectReference((PetscObject)a->A);CHKERRQ(ierr);
290 
291     if (a->isTI) {
292       /* If the transformation matrix associated with the parallel matrix A is the identity matrix, then a->T will be NULL.
293        * In this case, if we pass a->T directly to the MatCreateKAIJ() calls to create the sequential submatrices, the routine will
294        * not be able to tell that transformation matrix should be set to the identity; thus we create a temporary identity matrix
295        * to pass in. */
296       ierr = PetscMalloc(a->p*a->q*sizeof(PetscScalar),&T);CHKERRQ(ierr);
297       for (i=0; i<a->p; i++) {
298         for (j=0; j<a->q; j++) {
299           if (i==j) T[i+j*a->p] = 1.0;
300           else      T[i+j*a->p] = 0.0;
301         }
302       }
303     } else T = a->T;
304     ierr = MatCreateKAIJ(mpiaij->A,a->p,a->q,a->S,T,&a->AIJ);CHKERRQ(ierr);
305     ierr = MatCreateKAIJ(mpiaij->B,a->p,a->q,NULL,T,&a->OAIJ);CHKERRQ(ierr);
306     if (a->isTI) {
307       ierr = PetscFree(T);CHKERRQ(ierr);
308     }
309 
310     ierr = VecGetSize(mpiaij->lvec,&n);CHKERRQ(ierr);
311     ierr = VecCreate(PETSC_COMM_SELF,&a->w);CHKERRQ(ierr);
312     ierr = VecSetSizes(a->w,n*a->q,n*a->q);CHKERRQ(ierr);
313     ierr = VecSetBlockSize(a->w,a->q);CHKERRQ(ierr);
314     ierr = VecSetType(a->w,VECSEQ);CHKERRQ(ierr);
315 
316     /* create two temporary Index sets for build scatter gather */
317     ierr = ISCreateBlock(PetscObjectComm((PetscObject)a->A),a->q,n,mpiaij->garray,PETSC_COPY_VALUES,&from);CHKERRQ(ierr);
318     ierr = ISCreateStride(PETSC_COMM_SELF,n*a->q,0,1,&to);CHKERRQ(ierr);
319 
320     /* create temporary global vector to generate scatter context */
321     ierr = VecCreateMPIWithArray(PetscObjectComm((PetscObject)a->A),a->q,a->q*a->A->cmap->n,a->q*a->A->cmap->N,NULL,&gvec);CHKERRQ(ierr);
322 
323     /* generate the scatter context */
324     ierr = VecScatterCreate(gvec,from,a->w,to,&a->ctx);CHKERRQ(ierr);
325 
326     ierr = ISDestroy(&from);CHKERRQ(ierr);
327     ierr = ISDestroy(&to);CHKERRQ(ierr);
328     ierr = VecDestroy(&gvec);CHKERRQ(ierr);
329   }
330 
331   A->assembled = PETSC_TRUE;
332   PetscFunctionReturn(0);
333 }
334 
335 PetscErrorCode MatView_SeqKAIJ(Mat A,PetscViewer viewer)
336 {
337   PetscErrorCode ierr;
338   Mat            B;
339 
340   PetscFunctionBegin;
341   ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
342   ierr = MatView(B,viewer);CHKERRQ(ierr);
343   ierr = MatDestroy(&B);CHKERRQ(ierr);
344   PetscFunctionReturn(0);
345 }
346 
347 PetscErrorCode MatView_MPIKAIJ(Mat A,PetscViewer viewer)
348 {
349   PetscErrorCode ierr;
350   Mat            B;
351 
352   PetscFunctionBegin;
353   ierr = MatConvert(A,MATMPIAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr);
354   ierr = MatView(B,viewer);CHKERRQ(ierr);
355   ierr = MatDestroy(&B);CHKERRQ(ierr);
356   PetscFunctionReturn(0);
357 }
358 
359 PetscErrorCode MatDestroy_MPIKAIJ(Mat A)
360 {
361   PetscErrorCode ierr;
362   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
363 
364   PetscFunctionBegin;
365   ierr = MatDestroy(&b->AIJ);CHKERRQ(ierr);
366   ierr = MatDestroy(&b->OAIJ);CHKERRQ(ierr);
367   ierr = MatDestroy(&b->A);CHKERRQ(ierr);
368   ierr = VecScatterDestroy(&b->ctx);CHKERRQ(ierr);
369   ierr = VecDestroy(&b->w);CHKERRQ(ierr);
370   ierr = PetscFree3(b->S,b->T,b->ibdiag);CHKERRQ(ierr);
371   ierr = PetscFree(A->data);CHKERRQ(ierr);
372   PetscFunctionReturn(0);
373 }
374 
375 
376 /* --------------------------------------------------------------------------------------*/
377 
378 /* zz = yy + Axx */
379 PetscErrorCode KAIJMultAdd_Seq(Mat A,Vec xx,Vec yy,Vec zz)
380 {
381   Mat_SeqKAIJ       *b = (Mat_SeqKAIJ*)A->data;
382   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)b->AIJ->data;
383   const PetscScalar *s = b->S, *t = b->T;
384   const PetscScalar *x,*v,*bx;
385   PetscScalar       *y,*sums;
386   PetscErrorCode    ierr;
387   const PetscInt    m = b->AIJ->rmap->n,*idx,*ii;
388   PetscInt          n,i,jrow,j,l,p=b->p,q=b->q,k;
389 
390   PetscFunctionBegin;
391   if (!yy) {
392     ierr = VecSet(zz,0.0);CHKERRQ(ierr);
393   } else {
394     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
395   }
396   if ((!s) && (!t) && (!b->isTI)) PetscFunctionReturn(0);
397 
398   ierr = VecGetArrayRead(xx,&x);CHKERRQ(ierr);
399   ierr = VecGetArray(zz,&y);CHKERRQ(ierr);
400   idx  = a->j;
401   v    = a->a;
402   ii   = a->i;
403 
404   if (b->isTI) {
405     for (i=0; i<m; i++) {
406       jrow = ii[i];
407       n    = ii[i+1] - jrow;
408       sums = y + p*i;
409       for (j=0; j<n; j++) {
410         for (k=0; k<p; k++) {
411           sums[k] += v[jrow+j]*x[q*idx[jrow+j]+k];
412         }
413       }
414     }
415   } else if (t) {
416     for (i=0; i<m; i++) {
417       jrow = ii[i];
418       n    = ii[i+1] - jrow;
419       sums = y + p*i;
420       bx   = x + q*i;
421       for (j=0; j<n; j++) {
422         for (k=0; k<p; k++) {
423           for (l=0; l<q; l++) {
424             sums[k] += v[jrow+j]*t[k+l*p]*x[q*idx[jrow+j]+l];
425           }
426         }
427       }
428     }
429   }
430   if (s) {
431     for (i=0; i<m; i++) {
432       jrow = ii[i];
433       n    = ii[i+1] - jrow;
434       sums = y + p*i;
435       bx   = x + q*i;
436       if (i < b->AIJ->cmap->n) {
437         for (j=0; j<q; j++) {
438           for (k=0; k<p; k++) {
439             sums[k] += s[k+j*p]*bx[j];
440           }
441         }
442       }
443     }
444   }
445 
446   ierr = PetscLogFlops((2.0*p*q-p)*m+2*p*a->nz);CHKERRQ(ierr);
447   ierr = VecRestoreArrayRead(xx,&x);CHKERRQ(ierr);
448   ierr = VecRestoreArray(zz,&y);CHKERRQ(ierr);
449   PetscFunctionReturn(0);
450 }
451 
452 PetscErrorCode MatMult_SeqKAIJ_N(Mat A,Vec xx,Vec yy)
453 {
454   PetscErrorCode ierr;
455   PetscFunctionBegin;
456   ierr = KAIJMultAdd_Seq(A,xx,PETSC_NULL,yy);CHKERRQ(ierr);
457   PetscFunctionReturn(0);
458 }
459 
460 PetscErrorCode MatMultAdd_SeqKAIJ_N(Mat A,Vec xx,Vec yy,Vec zz)
461 {
462   PetscErrorCode ierr;
463   PetscFunctionBegin;
464   ierr = KAIJMultAdd_Seq(A,xx,yy,zz);CHKERRQ(ierr);
465   PetscFunctionReturn(0);
466 }
467 
468 #include <petsc/private/kernels/blockinvert.h>
469 
470 PetscErrorCode MatInvertBlockDiagonal_SeqKAIJ_N(Mat A,const PetscScalar **values)
471 {
472   Mat_SeqKAIJ       *b  = (Mat_SeqKAIJ*)A->data;
473   Mat_SeqAIJ        *a  = (Mat_SeqAIJ*)b->AIJ->data;
474   const PetscScalar *S  = b->S;
475   const PetscScalar *T  = b->T;
476   const PetscScalar *v  = a->a;
477   const PetscInt     p  = b->p, q = b->q, m = b->AIJ->rmap->n, *idx = a->j, *ii = a->i;
478   PetscErrorCode    ierr;
479   PetscInt          i,j,*v_pivots,dof,dof2;
480   PetscScalar       *diag,aval,*v_work;
481 
482   PetscFunctionBegin;
483   if (p != q) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Block size must be square to calculate inverse.");
484   if ((!S) && (!T) && (!b->isTI)) SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"MATKAIJ: Cannot invert a zero matrix.");
485 
486   dof  = p;
487   dof2 = dof*dof;
488 
489   if (b->ibdiagvalid) {
490     if (values) *values = b->ibdiag;
491     PetscFunctionReturn(0);
492   }
493   if (!b->ibdiag) {
494     ierr = PetscMalloc(dof2*m*sizeof(PetscScalar),&b->ibdiag);CHKERRQ(ierr);
495     ierr = PetscLogObjectMemory((PetscObject)A,dof2*m*sizeof(PetscScalar));CHKERRQ(ierr);
496   }
497   if (values) *values = b->ibdiag;
498   diag = b->ibdiag;
499 
500   ierr = PetscMalloc2(dof,&v_work,dof,&v_pivots);CHKERRQ(ierr);
501   for (i=0; i<m; i++) {
502     if (S) {
503       ierr = PetscMemcpy(diag,S,dof2*sizeof(PetscScalar));CHKERRQ(ierr);
504     } else {
505       ierr = PetscMemzero(diag,dof2*sizeof(PetscScalar));CHKERRQ(ierr);
506     }
507     if (b->isTI) {
508       aval = 0;
509       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
510       for (j=0; j<dof; j++) diag[j+dof*j] += aval;
511     } else if (T) {
512       aval = 0;
513       for (j=ii[i]; j<ii[i+1]; j++) if (idx[j] == i) aval = v[j];
514       for (j=0; j<dof2; j++) diag[j] += aval*T[j];
515     }
516     ierr = PetscKernel_A_gets_inverse_A(dof,diag,v_pivots,v_work,PETSC_FALSE,NULL);CHKERRQ(ierr);
517     diag += dof2;
518   }
519   ierr = PetscFree2(v_work,v_pivots);CHKERRQ(ierr);
520 
521   b->ibdiagvalid = PETSC_TRUE;
522   PetscFunctionReturn(0);
523 }
524 
525 static PetscErrorCode MatGetDiagonalBlock_MPIKAIJ(Mat A,Mat *B)
526 {
527   Mat_MPIKAIJ *kaij = (Mat_MPIKAIJ*) A->data;
528 
529   PetscFunctionBegin;
530   *B = kaij->AIJ;
531   PetscFunctionReturn(0);
532 }
533 
534 PetscErrorCode MatSOR_SeqKAIJ(Mat A,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
535 {
536   PetscErrorCode    ierr;
537   Mat_SeqKAIJ       *kaij = (Mat_SeqKAIJ*) A->data;
538   Mat_SeqAIJ        *a = (Mat_SeqAIJ*)kaij->AIJ->data;
539   const PetscScalar *aa = a->a, *T = kaij->T, *v;
540   const PetscInt    m  = kaij->AIJ->rmap->n, *ai=a->i, *aj=a->j, p = kaij->p, q = kaij->q, *diag, *vi;
541   const PetscScalar *b, *xb, *idiag;
542   PetscScalar       *x, *work, *workt, *w, *y, *arr, *t, *arrt;
543   PetscInt          i, j, k, i2, bs, bs2, nz;
544 
545   PetscFunctionBegin;
546   its = its*lits;
547   if (flag & SOR_EISENSTAT) SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"No support yet for Eisenstat");
548   if (its <= 0)             SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
549   if (fshift)               SETERRQ (PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for diagonal shift");
550   if ((flag & SOR_APPLY_UPPER) || (flag & SOR_APPLY_LOWER)) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Sorry, no support for applying upper or lower triangular parts");
551   if (p != q) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSOR for KAIJ: Sorry, no support for non-square dense blocks");
552   else        {bs = p; bs2 = bs*bs; }
553 
554   if (!m) PetscFunctionReturn(0);
555 
556   if (!kaij->ibdiagvalid) { ierr = MatInvertBlockDiagonal_SeqKAIJ_N(A,NULL);CHKERRQ(ierr); }
557   idiag = kaij->ibdiag;
558   diag  = a->diag;
559 
560   if (!kaij->sor.setup) {
561     ierr = PetscMalloc5(bs,&kaij->sor.w,bs,&kaij->sor.y,m*bs,&kaij->sor.work,m*bs,&kaij->sor.t,m*bs2,&kaij->sor.arr);CHKERRQ(ierr);
562     kaij->sor.setup = PETSC_TRUE;
563   }
564   y     = kaij->sor.y;
565   w     = kaij->sor.w;
566   work  = kaij->sor.work;
567   t     = kaij->sor.t;
568   arr   = kaij->sor.arr;
569 
570   ierr = VecGetArray(xx,&x);    CHKERRQ(ierr);
571   ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
572 
573   if (flag & SOR_ZERO_INITIAL_GUESS) {
574     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
575       PetscKernel_w_gets_Ar_times_v(bs,bs,b,idiag,x);                            /* x[0:bs] <- D^{-1} b[0:bs] */
576       ierr   =  PetscMemcpy(t,b,bs*sizeof(PetscScalar));CHKERRQ(ierr);
577       i2     =  bs;
578       idiag  += bs2;
579       for (i=1; i<m; i++) {
580         v  = aa + ai[i];
581         vi = aj + ai[i];
582         nz = diag[i] - ai[i];
583 
584         if (T) {                /* b - T (Arow * x) */
585           for (k=0; k<bs; k++) w[k] = 0;
586           for (j=0; j<nz; j++) {
587             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
588           }
589           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs,w,T,&t[i2]);
590           for (k=0; k<bs; k++) t[i2+k] += b[i2+k];
591         } else if (kaij->isTI) {
592           for (k=0; k<bs; k++) t[i2+k] = b[i2+k];
593           for (j=0; j<nz; j++) {
594             for (k=0; k<bs; k++) t[i2+k] -= v[j] * x[vi[j]*bs+k];
595           }
596         } else {
597           for (k=0; k<bs; k++) t[i2+k] = b[i2+k];
598         }
599 
600         PetscKernel_w_gets_Ar_times_v(bs,bs,t+i2,idiag,y);
601         for (j=0; j<bs; j++) x[i2+j] = omega * y[j];
602 
603         idiag += bs2;
604         i2    += bs;
605       }
606       /* for logging purposes assume number of nonzero in lower half is 1/2 of total */
607       ierr = PetscLogFlops(1.0*bs2*a->nz);CHKERRQ(ierr);
608       xb = t;
609     } else xb = b;
610     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
611       idiag = kaij->ibdiag+bs2*(m-1);
612       i2    = bs * (m-1);
613       ierr  = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
614       PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,x+i2);
615       i2    -= bs;
616       idiag -= bs2;
617       for (i=m-2; i>=0; i--) {
618         v  = aa + diag[i] + 1 ;
619         vi = aj + diag[i] + 1;
620         nz = ai[i+1] - diag[i] - 1;
621 
622         if (T) {                /* FIXME: This branch untested */
623           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
624           /* copy all rows of x that are needed into contiguous space */
625           workt = work;
626           for (j=0; j<nz; j++) {
627             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
628             workt += bs;
629           }
630           arrt = arr;
631           for (j=0; j<nz; j++) {
632             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
633             for (k=0; k<bs2; k++) arrt[k] *= v[j];
634             arrt += bs2;
635           }
636           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
637         } else if (kaij->isTI) {
638           for (k=0; k<bs; k++) w[k] = t[i2+k];
639           for (j=0; j<nz; j++) {
640             for (k=0; k<bs; k++) w[k] -= v[j] * x[vi[j]*bs+k];
641           }
642         }
643 
644         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y); /* RHS incorrect for omega != 1.0 */
645         for (j=0; j<bs; j++) x[i2+j] = (1.0-omega) * x[i2+j] + omega * y[j];
646 
647         idiag -= bs2;
648         i2    -= bs;
649       }
650       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
651     }
652     its--;
653   }
654   while (its--) {               /* FIXME: This branch not updated */
655     if (flag & SOR_FORWARD_SWEEP || flag & SOR_LOCAL_FORWARD_SWEEP) {
656       i2     =  0;
657       idiag  = kaij->ibdiag;
658       for (i=0; i<m; i++) {
659         ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
660 
661         v  = aa + ai[i];
662         vi = aj + ai[i];
663         nz = diag[i] - ai[i];
664         workt = work;
665         for (j=0; j<nz; j++) {
666           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
667           workt += bs;
668         }
669         arrt = arr;
670         if (T) {
671           for (j=0; j<nz; j++) {
672             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
673             for (k=0; k<bs2; k++) arrt[k] *= v[j];
674             arrt += bs2;
675           }
676           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
677         } else if (kaij->isTI) {
678           for (j=0; j<nz; j++) {
679             ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
680             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
681             arrt += bs2;
682           }
683           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
684         }
685         ierr = PetscMemcpy(t+i2,w,bs*sizeof(PetscScalar));CHKERRQ(ierr);
686 
687         v  = aa + diag[i] + 1;
688         vi = aj + diag[i] + 1;
689         nz = ai[i+1] - diag[i] - 1;
690         workt = work;
691         for (j=0; j<nz; j++) {
692           ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
693           workt += bs;
694         }
695         arrt = arr;
696         if (T) {
697           for (j=0; j<nz; j++) {
698             ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
699             for (k=0; k<bs2; k++) arrt[k] *= v[j];
700             arrt += bs2;
701           }
702           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
703         } else if (kaij->isTI) {
704           for (j=0; j<nz; j++) {
705             ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
706             for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
707             arrt += bs2;
708           }
709           PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
710         }
711 
712         PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
713         for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
714 
715         idiag += bs2;
716         i2    += bs;
717       }
718       xb = t;
719     }
720     else xb = b;
721     if (flag & SOR_BACKWARD_SWEEP || flag & SOR_LOCAL_BACKWARD_SWEEP) {
722       idiag = kaij->ibdiag+bs2*(m-1);
723       i2    = bs * (m-1);
724       if (xb == b) {
725         for (i=m-1; i>=0; i--) {
726           ierr = PetscMemcpy(w,b+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
727 
728           v  = aa + ai[i];
729           vi = aj + ai[i];
730           nz = diag[i] - ai[i];
731           workt = work;
732           for (j=0; j<nz; j++) {
733             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
734             workt += bs;
735           }
736           arrt = arr;
737           if (T) {
738             for (j=0; j<nz; j++) {
739               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
740               for (k=0; k<bs2; k++) arrt[k] *= v[j];
741               arrt += bs2;
742             }
743             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
744           } else if (kaij->isTI) {
745             for (j=0; j<nz; j++) {
746               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
747               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
748               arrt += bs2;
749             }
750             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
751           }
752 
753           v  = aa + diag[i] + 1;
754           vi = aj + diag[i] + 1;
755           nz = ai[i+1] - diag[i] - 1;
756           workt = work;
757           for (j=0; j<nz; j++) {
758             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
759             workt += bs;
760           }
761           arrt = arr;
762           if (T) {
763             for (j=0; j<nz; j++) {
764               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
765               for (k=0; k<bs2; k++) arrt[k] *= v[j];
766               arrt += bs2;
767             }
768             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
769           } else if (kaij->isTI) {
770             for (j=0; j<nz; j++) {
771               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
772               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
773               arrt += bs2;
774             }
775             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
776           }
777 
778           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
779           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
780         }
781       } else {
782         for (i=m-1; i>=0; i--) {
783           ierr = PetscMemcpy(w,xb+i2,bs*sizeof(PetscScalar));CHKERRQ(ierr);
784           v  = aa + diag[i] + 1;
785           vi = aj + diag[i] + 1;
786           nz = ai[i+1] - diag[i] - 1;
787           workt = work;
788           for (j=0; j<nz; j++) {
789             ierr   = PetscMemcpy(workt,x + bs*(*vi++),bs*sizeof(PetscScalar));CHKERRQ(ierr);
790             workt += bs;
791           }
792           arrt = arr;
793           if (T) {
794             for (j=0; j<nz; j++) {
795               ierr  = PetscMemcpy(arrt,T,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
796               for (k=0; k<bs2; k++) arrt[k] *= v[j];
797               arrt += bs2;
798             }
799             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
800           } else if (kaij->isTI) {
801             for (j=0; j<nz; j++) {
802               ierr = PetscMemzero(arrt,bs2*sizeof(PetscScalar));CHKERRQ(ierr);
803               for (k=0; k<bs; k++) arrt[k+bs*k] = v[j];
804               arrt += bs2;
805             }
806             PetscKernel_w_gets_w_minus_Ar_times_v(bs,bs*nz,w,arr,work);
807           }
808           PetscKernel_w_gets_Ar_times_v(bs,bs,w,idiag,y);
809           for (j=0; j<bs; j++) *(x+i2+j) = (1.0-omega) * *(x+i2+j) + omega * *(y+j);
810         }
811         idiag -= bs2;
812         i2    -= bs;
813       }
814       ierr = PetscLogFlops(1.0*bs2*(a->nz));CHKERRQ(ierr);
815     }
816   }
817 
818   ierr = VecRestoreArray(xx,&x);    CHKERRQ(ierr);
819   ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
820   PetscFunctionReturn(0);
821 }
822 
823 /*===================================================================================*/
824 
825 PetscErrorCode KAIJMultAdd_MPI(Mat A,Vec xx,Vec yy,Vec zz)
826 {
827   Mat_MPIKAIJ    *b = (Mat_MPIKAIJ*)A->data;
828   PetscErrorCode ierr;
829 
830   PetscFunctionBegin;
831   if (!yy) {
832     ierr = VecSet(zz,0.0);CHKERRQ(ierr);
833   } else {
834     ierr = VecCopy(yy,zz);CHKERRQ(ierr);
835   }
836   /* start the scatter */
837   ierr = VecScatterBegin(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
838   ierr = (*b->AIJ->ops->multadd)(b->AIJ,xx,zz,zz);CHKERRQ(ierr);
839   ierr = VecScatterEnd(b->ctx,xx,b->w,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
840   ierr = (*b->OAIJ->ops->multadd)(b->OAIJ,b->w,zz,zz);CHKERRQ(ierr);
841   PetscFunctionReturn(0);
842 }
843 
844 PetscErrorCode MatMult_MPIKAIJ_dof(Mat A,Vec xx,Vec yy)
845 {
846   PetscErrorCode ierr;
847   PetscFunctionBegin;
848   ierr = KAIJMultAdd_MPI(A,xx,PETSC_NULL,yy);CHKERRQ(ierr);
849   PetscFunctionReturn(0);
850 }
851 
852 PetscErrorCode MatMultAdd_MPIKAIJ_dof(Mat A,Vec xx,Vec yy, Vec zz)
853 {
854   PetscErrorCode ierr;
855   PetscFunctionBegin;
856   ierr = KAIJMultAdd_MPI(A,xx,yy,zz);CHKERRQ(ierr);
857   PetscFunctionReturn(0);
858 }
859 
860 PetscErrorCode MatInvertBlockDiagonal_MPIKAIJ_dof(Mat A,const PetscScalar **values)
861 {
862   Mat_MPIKAIJ     *b = (Mat_MPIKAIJ*)A->data;
863   PetscErrorCode  ierr;
864 
865   PetscFunctionBegin;
866   ierr = (*b->AIJ->ops->invertblockdiagonal)(b->AIJ,values);CHKERRQ(ierr);
867   PetscFunctionReturn(0);
868 }
869 
870 /* ----------------------------------------------------------------*/
871 
872 PetscErrorCode MatGetRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
873 {
874   Mat_SeqKAIJ     *b   = (Mat_SeqKAIJ*) A->data;
875   PetscErrorCode  diag = PETSC_FALSE;
876   PetscErrorCode  ierr;
877   PetscInt        nzaij,nz,*colsaij,*idx,i,j,p=b->p,q=b->q,r=row/p,s=row%p,c;
878   PetscScalar     *vaij,*v,*S=b->S,*T=b->T;
879 
880   PetscFunctionBegin;
881   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
882   b->getrowactive = PETSC_TRUE;
883   if (row < 0 || row >= A->rmap->n) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row %D out of range",row);
884 
885   if ((!S) && (!T) && (!b->isTI)) {
886     if (ncols)    *ncols  = 0;
887     if (cols)     *cols   = NULL;
888     if (values)   *values = NULL;
889     PetscFunctionReturn(0);
890   }
891 
892   if (T || b->isTI) {
893     ierr  = MatGetRow_SeqAIJ(b->AIJ,r,&nzaij,&colsaij,&vaij);CHKERRQ(ierr);
894     c     = nzaij;
895     for (i=0; i<nzaij; i++) {
896       /* check if this row contains a diagonal entry */
897       if (colsaij[i] == r) {
898         diag = PETSC_TRUE;
899         c = i;
900       }
901     }
902   } else nzaij = c = 0;
903 
904   /* calculate size of row */
905   nz = 0;
906   if (S)            nz += q;
907   if (T || b->isTI) nz += (diag && S ? (nzaij-1)*q : nzaij*q);
908 
909   if (cols || values) {
910     ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr);
911     for (i=0; i<q; i++) {
912       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
913       v[i] = 0.0;
914     }
915     if (b->isTI) {
916       for (i=0; i<nzaij; i++) {
917         for (j=0; j<q; j++) {
918           idx[i*q+j] = colsaij[i]*q+j;
919           v[i*q+j]   = (j==s ? vaij[i] : 0);
920         }
921       }
922     } else if (T) {
923       for (i=0; i<nzaij; i++) {
924         for (j=0; j<q; j++) {
925           idx[i*q+j] = colsaij[i]*q+j;
926           v[i*q+j]   = vaij[i]*T[s+j*p];
927         }
928       }
929     }
930     if (S) {
931       for (j=0; j<q; j++) {
932         idx[c*q+j] = r*q+j;
933         v[c*q+j]  += S[s+j*p];
934       }
935     }
936   }
937 
938   if (ncols)    *ncols  = nz;
939   if (cols)     *cols   = idx;
940   if (values)   *values = v;
941 
942   PetscFunctionReturn(0);
943 }
944 
945 PetscErrorCode MatRestoreRow_SeqKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
946 {
947   PetscErrorCode ierr;
948   PetscFunctionBegin;
949   ierr = PetscFree2(*idx,*v);CHKERRQ(ierr);
950   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
951   PetscFunctionReturn(0);
952 }
953 
954 PetscErrorCode MatGetRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *ncols,PetscInt **cols,PetscScalar **values)
955 {
956   Mat_MPIKAIJ     *b      = (Mat_MPIKAIJ*) A->data;
957   Mat             MatAIJ  = ((Mat_SeqKAIJ*)b->AIJ->data)->AIJ;
958   Mat             MatOAIJ = ((Mat_SeqKAIJ*)b->OAIJ->data)->AIJ;
959   Mat             AIJ     = b->A;
960   PetscBool       diag    = PETSC_FALSE;
961   PetscErrorCode  ierr;
962   const PetscInt  rstart=A->rmap->rstart,rend=A->rmap->rend,p=b->p,q=b->q,*garray;
963   PetscInt        nz,*idx,ncolsaij,ncolsoaij,*colsaij,*colsoaij,r,s,c,i,j,lrow;
964   PetscScalar     *v,*vals,*ovals,*S=b->S,*T=b->T;
965 
966   PetscFunctionBegin;
967   if (b->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
968   b->getrowactive = PETSC_TRUE;
969   if (row < rstart || row >= rend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Only local rows");
970   lrow = row - rstart;
971 
972   if ((!S) && (!T) && (!b->isTI)) {
973     if (ncols)    *ncols  = 0;
974     if (cols)     *cols   = NULL;
975     if (values)   *values = NULL;
976     PetscFunctionReturn(0);
977   }
978 
979   r = lrow/p;
980   s = lrow%p;
981 
982   if (T || b->isTI) {
983     ierr = MatMPIAIJGetSeqAIJ(AIJ,NULL,NULL,&garray);
984     ierr = MatGetRow_SeqAIJ(MatAIJ,lrow/p,&ncolsaij,&colsaij,&vals);CHKERRQ(ierr);
985     ierr = MatGetRow_SeqAIJ(MatOAIJ,lrow/p,&ncolsoaij,&colsoaij,&ovals);CHKERRQ(ierr);
986 
987     c     = ncolsaij + ncolsoaij;
988     for (i=0; i<ncolsaij; i++) {
989       /* check if this row contains a diagonal entry */
990       if (colsaij[i] == r) {
991         diag = PETSC_TRUE;
992         c = i;
993       }
994     }
995   } else c = 0;
996 
997   /* calculate size of row */
998   nz = 0;
999   if (S)            nz += q;
1000   if (T || b->isTI) nz += (diag && S ? (ncolsaij+ncolsoaij-1)*q : (ncolsaij+ncolsoaij)*q);
1001 
1002   if (cols || values) {
1003     ierr = PetscMalloc2(nz,&idx,nz,&v);CHKERRQ(ierr);
1004     for (i=0; i<q; i++) {
1005       /* We need to initialize the v[i] to zero to handle the case in which T is NULL (not the identity matrix). */
1006       v[i] = 0.0;
1007     }
1008     if (b->isTI) {
1009       for (i=0; i<ncolsaij; i++) {
1010         for (j=0; j<q; j++) {
1011           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1012           v[i*q+j]   = (j==s ? vals[i] : 0.0);
1013         }
1014       }
1015       for (i=0; i<ncolsoaij; i++) {
1016         for (j=0; j<q; j++) {
1017           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1018           v[(i+ncolsaij)*q+j]   = (j==s ? ovals[i]: 0.0);
1019         }
1020       }
1021     } else if (T) {
1022       for (i=0; i<ncolsaij; i++) {
1023         for (j=0; j<q; j++) {
1024           idx[i*q+j] = (colsaij[i]+rstart/p)*q+j;
1025           v[i*q+j]   = vals[i]*T[s+j*p];
1026         }
1027       }
1028       for (i=0; i<ncolsoaij; i++) {
1029         for (j=0; j<q; j++) {
1030           idx[(i+ncolsaij)*q+j] = garray[colsoaij[i]]*q+j;
1031           v[(i+ncolsaij)*q+j]   = ovals[i]*T[s+j*p];
1032         }
1033       }
1034     }
1035     if (S) {
1036       for (j=0; j<q; j++) {
1037         idx[c*q+j] = (r+rstart/p)*q+j;
1038         v[c*q+j]  += S[s+j*p];
1039       }
1040     }
1041   }
1042 
1043   if (ncols)  *ncols  = nz;
1044   if (cols)   *cols   = idx;
1045   if (values) *values = v;
1046   PetscFunctionReturn(0);
1047 }
1048 
1049 PetscErrorCode MatRestoreRow_MPIKAIJ(Mat A,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1050 {
1051   PetscErrorCode ierr;
1052   PetscFunctionBegin;
1053   ierr = PetscFree2(*idx,*v);CHKERRQ(ierr);
1054   ((Mat_SeqKAIJ*)A->data)->getrowactive = PETSC_FALSE;
1055   PetscFunctionReturn(0);
1056 }
1057 
1058 PetscErrorCode  MatCreateSubMatrix_KAIJ(Mat mat,IS isrow,IS iscol,MatReuse cll,Mat *newmat)
1059 {
1060   PetscErrorCode ierr;
1061   Mat            A;
1062 
1063   PetscFunctionBegin;
1064   ierr = MatConvert(mat,MATAIJ,MAT_INITIAL_MATRIX,&A);CHKERRQ(ierr);
1065   ierr = MatCreateSubMatrix(A,isrow,iscol,cll,newmat);CHKERRQ(ierr);
1066   ierr = MatDestroy(&A);CHKERRQ(ierr);
1067   PetscFunctionReturn(0);
1068 }
1069 
1070 /* ---------------------------------------------------------------------------------- */
1071 /*@C
1072   MatCreateKAIJ - Creates a matrix type to be used for matrices of the following form:
1073 
1074     [I \otimes S + A \otimes T]
1075 
1076   where
1077     S is a dense (p \times q) matrix
1078     T is a dense (p \times q) matrix
1079     A is an AIJ  (n \times n) matrix
1080     I is the identity matrix
1081   The resulting matrix is (np \times nq)
1082 
1083   The matrix type is based on MATSEQAIJ for a sequential matrix A, and MATMPIAIJ for a distributed matrix A.
1084   S is always stored independently on all processes as a PetscScalar array in column-major format.
1085 
1086   Collective
1087 
1088   Input Parameters:
1089 + A - the AIJ matrix
1090 . S - the S matrix, stored as a PetscScalar array (column-major)
1091 . T - the T matrix, stored as a PetscScalar array (column-major)
1092 . p - number of rows in S and T
1093 - q - number of columns in S and T
1094 
1095   Output Parameter:
1096 . kaij - the new KAIJ matrix
1097 
1098   Operations provided:
1099 + MatMult
1100 . MatMultAdd
1101 . MatInvertBlockDiagonal
1102 - MatView
1103 
1104   Level: advanced
1105 
1106 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MATKAIJ
1107 @*/
1108 PetscErrorCode  MatCreateKAIJ(Mat A,PetscInt p,PetscInt q,const PetscScalar S[],const PetscScalar T[],Mat *kaij)
1109 {
1110   PetscErrorCode ierr;
1111   PetscMPIInt    size;
1112 
1113   PetscFunctionBegin;
1114   ierr = MatCreate(PetscObjectComm((PetscObject)A),kaij);CHKERRQ(ierr);
1115   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
1116   if (size == 1) {
1117     ierr = MatSetType(*kaij,MATSEQKAIJ);CHKERRQ(ierr);
1118   } else {
1119     ierr = MatSetType(*kaij,MATMPIKAIJ);CHKERRQ(ierr);
1120   }
1121   ierr = MatKAIJSetAIJ(*kaij,A);CHKERRQ(ierr);
1122   ierr = MatKAIJSetS(*kaij,p,q,S);CHKERRQ(ierr);
1123   ierr = MatKAIJSetT(*kaij,p,q,T);CHKERRQ(ierr);
1124   ierr = MatSetUp(*kaij);
1125   PetscFunctionReturn(0);
1126 }
1127 
1128 /*MC
1129   MATKAIJ - MATKAIJ = "kaij" - A matrix type to be used to evaluate matrices of the following form:
1130 
1131     [I \otimes S + A \otimes T]
1132 
1133   where
1134     S is a dense (p \times q) matrix
1135     T is a dense (p \times q) matrix
1136     A is an AIJ  (n \times n) matrix
1137     I is the identity matrix
1138   The resulting matrix is (np \times nq)
1139 
1140   The matrix type is based on MATSEQAIJ for a sequential matrix A, and MATMPIAIJ for a distributed matrix A.
1141   S and T are always stored independently on all processes as a PetscScalar array in column-major format.
1142 
1143   Operations provided:
1144 + MatMult
1145 . MatMultAdd
1146 . MatInvertBlockDiagonal
1147 - MatView
1148 
1149   Level: advanced
1150 
1151 .seealso: MatKAIJSetAIJ(), MatKAIJSetS(), MatKAIJSetT(), MatKAIJGetAIJ(), MatKAIJGetS(), MatKAIJGetT(), MatCreateKAIJ()
1152 M*/
1153 
1154 PETSC_EXTERN PetscErrorCode MatCreate_KAIJ(Mat A)
1155 {
1156   PetscErrorCode ierr;
1157   Mat_MPIKAIJ    *b;
1158   PetscMPIInt    size;
1159 
1160   PetscFunctionBegin;
1161   ierr     = PetscNewLog(A,&b);CHKERRQ(ierr);
1162   A->data  = (void*)b;
1163 
1164   ierr = PetscMemzero(A->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1165 
1166   A->ops->setup = MatSetUp_KAIJ;
1167 
1168   b->w    = 0;
1169   ierr    = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
1170   if (size == 1) {
1171     ierr = PetscObjectChangeTypeName((PetscObject)A,MATSEQKAIJ);CHKERRQ(ierr);
1172 
1173     A->ops->setup               = MatSetUp_KAIJ;
1174     A->ops->destroy             = MatDestroy_SeqKAIJ;
1175     A->ops->view                = MatView_SeqKAIJ;
1176     A->ops->mult                = MatMult_SeqKAIJ_N;
1177     A->ops->multadd             = MatMultAdd_SeqKAIJ_N;
1178     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_SeqKAIJ_N;
1179     A->ops->getrow              = MatGetRow_SeqKAIJ;
1180     A->ops->restorerow          = MatRestoreRow_SeqKAIJ;
1181     A->ops->sor                 = MatSOR_SeqKAIJ;
1182   } else {
1183     ierr = PetscObjectChangeTypeName((PetscObject)A,MATMPIKAIJ);CHKERRQ(ierr);
1184     A->ops->setup               = MatSetUp_KAIJ;
1185     A->ops->destroy             = MatDestroy_MPIKAIJ;
1186     A->ops->view                = MatView_MPIKAIJ;
1187     A->ops->mult                = MatMult_MPIKAIJ_dof;
1188     A->ops->multadd             = MatMultAdd_MPIKAIJ_dof;
1189     A->ops->invertblockdiagonal = MatInvertBlockDiagonal_MPIKAIJ_dof;
1190     A->ops->getrow              = MatGetRow_MPIKAIJ;
1191     A->ops->restorerow          = MatRestoreRow_MPIKAIJ;
1192     ierr = PetscObjectComposeFunction((PetscObject)A,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPIKAIJ);CHKERRQ(ierr);
1193   }
1194   A->ops->createsubmatrix = MatCreateSubMatrix_KAIJ;
1195   PetscFunctionReturn(0);
1196 }
1197